Skip to content

Commit

Permalink
Support awkward, sparse structures in sync.copy
Browse files Browse the repository at this point in the history
  • Loading branch information
danielballan committed May 7, 2024
1 parent 56338b9 commit 49ec654
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 21 deletions.
17 changes: 17 additions & 0 deletions tiled/_tests/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import contextlib
import tempfile

import awkward
import h5py
import numpy
import pandas
import sparse
import tifffile

from tiled.catalog import in_memory
Expand Down Expand Up @@ -43,6 +45,9 @@ def populate_external(client, tmp_path):
with h5py.File(filepath, "w") as file:
g = file.create_group("g")
g["data"] = numpy.arange(3)
# Note: Tiled does not currently happen to support any formats that it
# identifies as 'awkward' or 'sparse'. Potentially it could, and this
# test could be expanded to include those examples.
asyncio.run(register(client, tmp_path))


Expand All @@ -53,12 +58,24 @@ def populate_internal(client):
# table
df = pandas.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
client.write_dataframe(df, key="b", metadata={"color": "green"}, specs=["beta"])
# awkward
client.write_awkward(
awkward.Array([1, [2, 3]]), key="d", metadata={"color": "red"}, specs=["alpha"]
)
# sparse
coo = sparse.COO(coords=[[2, 5]], data=[1.3, 7.5], shape=(10,))
client.write_sparse(key="e", coords=coo.coords, data=coo.data, shape=coo.shape)

# nested
container = client.create_container("c")
container.write_array(
[1, 2, 3], key="A", metadata={"color": "red"}, specs=["alpha"]
)
container.write_dataframe(df, key="B", metadata={"color": "green"}, specs=["beta"])
container.write_awkward(
awkward.Array([1, [2, 3]]), key="D", metadata={"color": "red"}, specs=["alpha"]
)
container.write_sparse(key="E", coords=coo.coords, data=coo.data, shape=coo.shape)


def test_copy_internal():
Expand Down
23 changes: 2 additions & 21 deletions tiled/client/base.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import importlib
import time
import warnings
from dataclasses import asdict
from pathlib import Path

from httpx import URL

from ..structures.core import Spec, StructureFamily
from ..structures.core import STRUCTURE_TYPES, Spec, StructureFamily
from ..structures.data_source import DataSource
from ..utils import UNCHANGED, DictView, ListView, OneShotCachedMap, safe_json_dump
from ..utils import UNCHANGED, DictView, ListView, safe_json_dump
from .utils import MSGPACK_MIME_TYPE, handle_error


Expand Down Expand Up @@ -437,21 +436,3 @@ def delete_tree(self):

def __dask_tokenize__(self):
return (type(self), self.uri)


STRUCTURE_TYPES = OneShotCachedMap(
{
StructureFamily.array: lambda: importlib.import_module(
"...structures.array", BaseClient.__module__
).ArrayStructure,
StructureFamily.awkward: lambda: importlib.import_module(
"...structures.awkward", BaseClient.__module__
).AwkwardStructure,
StructureFamily.table: lambda: importlib.import_module(
"...structures.table", BaseClient.__module__
).TableStructure,
StructureFamily.sparse: lambda: importlib.import_module(
"...structures.sparse", BaseClient.__module__
).SparseStructure,
}
)
9 changes: 9 additions & 0 deletions tiled/client/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,15 @@ def new(
# Merge in "id" and "links" returned by the server.
item.update(document)

# Ensure this is a dataclass, not a dict.
# When we apply type hints and mypy to the client it should be possible
# to dispense with this.
if (structure_family != StructureFamily.container) and isinstance(
structure, dict
):
structure_type = STRUCTURE_TYPES[structure_family]
structure = structure_type.from_json(structure)

return client_for_item(
self.context,
self.structure_clients,
Expand Down
18 changes: 18 additions & 0 deletions tiled/client/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,22 @@ def _copy_array(source, dest):
dest.write_block(array, block)


def _copy_awkward(source, dest):
import awkward

array = source.read()
_form, _length, container = awkward.to_buffers(array)
dest.write(container)


def _copy_sparse(source, dest):
num_blocks = (range(len(n)) for n in source.chunks)
# Loop over each block index --- e.g. (0, 0), (0, 1), (0, 2) ....
for block in itertools.product(*num_blocks):
array = source.read_block(block)
dest.write_block(array.coords, array.data, block)


def _copy_table(source, dest):
for partition in range(source.structure().npartitions):
df = source.read_partition(partition)
Expand Down Expand Up @@ -111,6 +127,8 @@ def _copy_container(source, dest):

_DISPATCH = {
StructureFamily.array: _copy_array,
StructureFamily.awkward: _copy_awkward,
StructureFamily.container: _copy_container,
StructureFamily.sparse: _copy_sparse,
StructureFamily.table: _copy_table,
}
21 changes: 21 additions & 0 deletions tiled/structures/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
"""

import enum
import importlib
from dataclasses import asdict, dataclass
from typing import Dict, Optional

from ..utils import OneShotCachedMap


class StructureFamily(str, enum.Enum):
array = "array"
Expand Down Expand Up @@ -41,3 +44,21 @@ def dict(self) -> Dict[str, Optional[str]]:
return asdict(self)

model_dump = dict # For easy interoperability with pydantic 2.x models


STRUCTURE_TYPES = OneShotCachedMap(
{
StructureFamily.array: lambda: importlib.import_module(
"...structures.array", StructureFamily.__module__
).ArrayStructure,
StructureFamily.awkward: lambda: importlib.import_module(
"...structures.awkward", StructureFamily.__module__
).AwkwardStructure,
StructureFamily.table: lambda: importlib.import_module(
"...structures.table", StructureFamily.__module__
).TableStructure,
StructureFamily.sparse: lambda: importlib.import_module(
"...structures.sparse", StructureFamily.__module__
).SparseStructure,
}
)

0 comments on commit 49ec654

Please sign in to comment.