Skip to content

Commit

Permalink
Create python Group base class for Collection (#3024)
Browse files Browse the repository at this point in the history
This change moves functionality from the python `Collection` class into
a `Group` class. This is a precursor for the new `MultiscaleImage` class
that is a TileDB group, but not a SOMA `Collection`
  • Loading branch information
jp-dark authored Sep 23, 2024
1 parent 415f1a2 commit 7a67820
Show file tree
Hide file tree
Showing 3 changed files with 324 additions and 235 deletions.
239 changes: 6 additions & 233 deletions apis/python/src/tiledbsoma/_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,19 @@
from __future__ import annotations

import itertools
import re
from typing import (
Any,
Callable,
ClassVar,
Dict,
Iterable,
Iterator,
Optional,
Set,
Tuple,
Type,
TypeVar,
cast,
overload,
)

import attrs
import somacore
import somacore.collection
from somacore import options
Expand All @@ -38,18 +33,13 @@
from ._dense_nd_array import DenseNDArray
from ._exception import (
SOMAError,
is_does_not_exist_error,
map_exception_for_create,
)
from ._funcs import typeguard_ignore
from ._soma_group import SOMAGroup
from ._soma_object import AnySOMAObject, SOMAObject
from ._sparse_nd_array import SparseNDArray
from ._types import OpenTimestamp
from ._util import (
is_relative_uri,
make_relative_path,
uri_joinpath,
)
from .options import SOMATileDBContext
from .options._soma_tiledb_context import _validate_soma_tiledb_context

Expand All @@ -60,25 +50,16 @@
_NDArr = TypeVar("_NDArr", bound=NDArray)


@attrs.define()
class _CachedElement:
"""Item we have loaded in the cache of a collection."""

entry: _tdb_handles.GroupEntry
soma: Optional[AnySOMAObject] = None
"""The reified object, if it has been opened."""


class CollectionBase( # type: ignore[misc] # __eq__ false positive
SOMAObject[_tdb_handles.SOMAGroupWrapper[Any]],
SOMAGroup[CollectionElementType],
somacore.collection.BaseCollection[CollectionElementType],
):
"""Contains a key-value mapping where the keys are string names and the values
are any SOMA-defined foundational or composed type, including :class:`Collection`,
:class:`DataFrame`, :class:`DenseNDArray`, :class:`SparseNDArray` or :class:`Experiment`.
"""

__slots__ = ("_contents", "_mutated_keys")
__slots__ = ()

# TODO: Implement additional creation of members on collection subclasses.
@classmethod
Expand Down Expand Up @@ -172,21 +153,6 @@ def open(
:class:`Measurement` for details.
"""

def __init__(
self,
handle: _tdb_handles.SOMAGroupWrapper[Any],
**kwargs: Any,
):
super().__init__(handle, **kwargs)
self._contents = {
key: _CachedElement(entry) for key, entry in handle.initial_contents.items()
}
"""The contents of the persisted TileDB Group.
This is loaded at startup when we have a read handle.
"""
self._mutated_keys: Set[str] = set()

# Overloads to allow type inference to work when doing:
#
# some_coll.add_new_collection("key") # -> Collection
Expand Down Expand Up @@ -433,123 +399,16 @@ def _add_new_element(
If set, the URI to use for the child
instead of the default.
"""
if key in self:
raise KeyError(f"{key!r} already exists in {type(self)}")
self._check_allows_child(key, kind)
child_uri = self._new_child_uri(key=key, user_uri=user_uri)
child = factory(child_uri.full_uri)
# The resulting element may not be the right type for this collection,
# but we can't really handle that within the type system.
self._set_element(
key,
uri=child_uri.add_uri,
relative=child_uri.relative,
soma_object=child, # type: ignore[arg-type]
return super()._add_new_element(
key, kind=kind, factory=factory, user_uri=user_uri
)
self._close_stack.enter_context(child)
return child

def __len__(self) -> int:
"""Return the number of members in the collection"""
return len(self._contents)

def __getitem__(self, key: str) -> CollectionElementType:
"""Gets the value associated with the key."""
err_str = f"{self.__class__.__name__} has no item {key!r}"

try:
entry = self._contents[key]
except KeyError:
raise KeyError(err_str) from None
if entry.soma is None:
from . import _factory # Delayed binding to resolve circular import.

uri = entry.entry.uri
mode = self.mode
context = self.context
timestamp = self.tiledb_timestamp_ms
clib_type = entry.entry.wrapper_type.clib_type

wrapper = _tdb_handles.open(uri, mode, context, timestamp, clib_type)
entry.soma = _factory.reify_handle(wrapper)

# Since we just opened this object, we own it and should close it.
self._close_stack.enter_context(entry.soma)
return cast(CollectionElementType, entry.soma)

def set(
self,
key: str,
value: CollectionElementType,
*,
use_relative_uri: Optional[bool] = None,
) -> Self:
"""Adds an element to the collection.
Args:
key:
The key of the element to be added.
value:
The value to be added to this collection.
use_relative_uri:
By default (None), the collection will determine whether the
element should be stored by relative URI.
If True, the collection will store the child by absolute URI.
If False, the collection will store the child by relative URI.
Raises:
SOMAError:
If an existing key is set (replacement is unsupported).
Lifecycle:
Maturing.
"""
uri_to_add = value.uri
# The SOMA API supports use_relative_uri in [True, False, None].
# The TileDB-Py API supports use_relative_uri in [True, False].
# Map from the former to the latter -- and also honor our somacore contract for None --
# using the following rule.
if use_relative_uri is None and value.uri.startswith("tiledb://"):
# TileDB-Cloud does not use relative URIs, ever.
use_relative_uri = False

if use_relative_uri is not False:
try:
uri_to_add = make_relative_path(value.uri, relative_to=self.uri)
use_relative_uri = True
except ValueError:
if use_relative_uri:
# We couldn't construct a relative URI, but we were asked
# to use one, so raise the error.
raise
use_relative_uri = False

self._set_element(
key, uri=uri_to_add, relative=use_relative_uri, soma_object=value
)
return self

def members(self) -> Dict[str, Tuple[str, str]]:
"""Get a mapping of {member_name: (uri, soma_object_type)}"""
handle = cast(_tdb_handles.SOMAGroupWrapper[Any], self._handle)
return handle.members()

def __setitem__(self, key: str, value: CollectionElementType) -> None:
"""Default collection __setattr__"""
self.set(key, value, use_relative_uri=None)

def __delitem__(self, key: str) -> None:
"""Removes a member from the collection, when invoked as ``del collection["namegoeshere"]``.
Raises:
SOMAError:
Upon deletion of a mutated key.
"""
self._del_element(key)

def __iter__(self) -> Iterator[str]:
return iter(self._contents)

def __repr__(self) -> str:
"""Default display for :class:`Collection`."""
lines = itertools.chain((self._my_repr(),), self._contents_lines(""))
Expand All @@ -572,20 +431,6 @@ def _my_repr(self) -> str:
count = f"{n} items"
return f"{start} ({count})"

def _contents_lines(self, last_indent: str) -> Iterable[str]:
indent = last_indent + " "
if self.closed:
return
for key, entry in self._contents.items():
obj = entry.soma
if obj is None:
# We haven't reified this SOMA object yet. Don't try to open it.
yield f"{indent}{key!r}: {entry.entry.uri!r} (unopened)"
else:
yield f"{indent}{key!r}: {obj._my_repr()}"
if isinstance(obj, CollectionBase):
yield from obj._contents_lines(indent)

def _set_element(
self,
key: str,
Expand All @@ -608,60 +453,7 @@ def _set_element(
"""

self._check_allows_child(key, type(soma_object))

if key in self._mutated_keys.union(self._contents):
# TileDB groups currently do not support replacing elements.
# If we use a hack to flush writes, corruption is possible.
raise SOMAError(f"replacing key {key!r} is unsupported")
clib_collection = self._handle._handle
relative_type = clib.URIType.relative if relative else clib.URIType.absolute
clib_collection.add(
uri=uri,
uri_type=relative_type,
name=key,
soma_type=clib_collection.type,
)
self._contents[key] = _CachedElement(
entry=_tdb_handles.GroupEntry(soma_object.uri, soma_object._wrapper_type),
soma=soma_object,
)
self._mutated_keys.add(key)

def _del_element(self, key: str) -> None:
if key in self._mutated_keys:
raise SOMAError(f"cannot delete previously-mutated key {key!r}")
try:
self._handle.writer.remove(key)
except RuntimeError as tdbe:
if is_does_not_exist_error(tdbe):
raise KeyError(tdbe) from tdbe
raise
self._contents.pop(key, None)
self._mutated_keys.add(key)

def _new_child_uri(self, *, key: str, user_uri: Optional[str]) -> "_ChildURI":
maybe_relative_uri = user_uri or _sanitize_for_path(key)
if not is_relative_uri(maybe_relative_uri):
# It's an absolute URI.
return _ChildURI(
add_uri=maybe_relative_uri,
full_uri=maybe_relative_uri,
relative=False,
)
if not self.uri.startswith("tiledb://"):
# We don't need to post-process anything.
return _ChildURI(
add_uri=maybe_relative_uri,
full_uri=uri_joinpath(self.uri, maybe_relative_uri),
relative=True,
)
# Our own URI is a ``tiledb://`` URI. Since TileDB Cloud requires absolute
# URIs, we need to calculate the absolute URI to pass to Group.add
# based on our creation URI.
# TODO: Handle the case where we reopen a TileDB Cloud Group, but by
# name rather than creation path.
absolute_uri = uri_joinpath(self.uri, maybe_relative_uri)
return _ChildURI(add_uri=absolute_uri, full_uri=absolute_uri, relative=False)
super()._set_element(key, uri=uri, relative=relative, soma_object=soma_object)

@classmethod
def _check_allows_child(cls, key: str, child_cls: type) -> None:
Expand Down Expand Up @@ -760,22 +552,3 @@ def _real_class(cls: Type[Any]) -> type:
except (AttributeError, TypeError) as exc:
raise err from exc
raise err


_NON_WORDS = re.compile(r"[\W_]+")


def _sanitize_for_path(key: str) -> str:
"""Prepares the given key for use as a path component."""
sanitized = "_".join(_NON_WORDS.split(key))
return sanitized


@attrs.define(frozen=True, kw_only=True)
class _ChildURI:
add_uri: str
"""The URI of the child for passing to :meth:``clib.SOMAGroup.add``."""
full_uri: str
"""The full URI of the child, used to create a new element."""
relative: bool
"""The ``relative`` value to pass to :meth:``clib.SOMAGroup.add``."""
Loading

0 comments on commit 7a67820

Please sign in to comment.