-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This extracts the public interface of `tiledbsoma.query.ExperimentAxisQuery` and puts it here. Because there is no TileDB-specific behavior in `ExperimentAxisQuery`, I believe we can actually pull the entire implementation here (it only uses the properties of Experiment itself) but that is for a later date.
- Loading branch information
1 parent
fe5abfb
commit 12b033e
Showing
7 changed files
with
278 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
from typing import Any, Optional, Sequence, Tuple | ||
|
||
import attrs | ||
import numpy as np | ||
import pyarrow as pa | ||
from typing_extensions import TypeGuard | ||
|
||
from somacore import options | ||
|
||
|
||
def _canonicalize_coords( | ||
in_coords: options.SparseDFCoords, | ||
) -> Tuple[options.SparseDFCoord, ...]: | ||
"""Validates coordinates and freezes sequences as tuples. | ||
This is not strictly necessary; DataFrame will report these errors | ||
eventually but doing it now makes for better UX. | ||
""" | ||
if not _is_normal_sequence(in_coords): | ||
raise TypeError( | ||
"query coordinates must be a normal sequence, not `str` or `bytes`." | ||
) | ||
return tuple(_canonicalize_coord(c) for c in in_coords) | ||
|
||
|
||
def _canonicalize_coord(coord: options.SparseDFCoord) -> options.SparseDFCoord: | ||
"""Validates a single coordinate, freezing mutable sequences.""" | ||
if coord is None or isinstance( | ||
coord, (int, slice, pa.Array, pa.ChunkedArray, np.ndarray) | ||
): | ||
return coord | ||
if _is_normal_sequence(coord): | ||
# We're trusting here that the elements of the user's sequence are | ||
# appropriate. If this is not the case, it will raise down the line. | ||
return tuple(coord) | ||
raise TypeError(f"{type(coord)} object cannot be used as a coordinate.") | ||
|
||
|
||
def _is_normal_sequence(it: Any) -> TypeGuard[Sequence]: | ||
return not isinstance(it, (str, bytes)) and isinstance(it, Sequence) | ||
|
||
|
||
@attrs.define(frozen=True, kw_only=True) | ||
class AxisQuery: | ||
"""Single-axis dataframe query with coordinates and a value filter. | ||
[lifecycle: experimental] | ||
Per dimension, the AxisQuery can have value of: | ||
* None - all data | ||
* Coordinates - a set of coordinates on the axis dataframe index, | ||
expressed in any type or format supported by ``DataFrame.read()``. | ||
* A SOMA ``value_filter`` across columns in the axis dataframe, | ||
expressed as string | ||
* Or, a combination of coordinates and value filter. | ||
Examples:: | ||
AxisQuery() # all data | ||
AxisQuery(coords=(slice(1,10),)) # 1D, slice | ||
AxisQuery(coords=([0,1,2])) # 1D, point indexing using array-like | ||
AxisQuery(coords=(slice(None), numpy.array([0,88,1001]))) # 2D | ||
AxisQuery(value_filter="tissue == 'lung'") | ||
AxisQuery(coords=(slice(1,None),), value_filter="tissue == 'lung'") | ||
``` | ||
""" | ||
|
||
value_filter: Optional[str] = attrs.field( | ||
default=None, | ||
validator=attrs.validators.optional(attrs.validators.instance_of(str)), | ||
) | ||
"""A string specifying a SOMA ``value_filter``.""" | ||
coords: Tuple[options.SparseDFCoord, ...] = attrs.field( | ||
default=(slice(None),), | ||
converter=_canonicalize_coords, | ||
) | ||
"""Query (slice) by dimension. | ||
The tuple must have a length less than or equal to the number of dimensions, | ||
and be of a type supported by ``DataFrame``. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import abc | ||
import contextlib | ||
from typing import Any, Optional, Sequence | ||
|
||
import anndata | ||
import pyarrow as pa | ||
from typing_extensions import TypedDict | ||
|
||
from somacore import data | ||
|
||
|
||
class AxisColumnNames(TypedDict, total=False): | ||
"""Specifies column names for experiment axis query read operations.""" | ||
|
||
obs: Optional[Sequence[str]] | ||
"""obs columns to use. All columns if ``None`` or not present.""" | ||
var: Optional[Sequence[str]] | ||
"""var columns to use. All columns if ``None`` or not present.""" | ||
|
||
|
||
class ExperimentAxisQuery(contextlib.AbstractContextManager, metaclass=abc.ABCMeta): | ||
@abc.abstractmethod | ||
def obs( | ||
self, *, column_names: Optional[Sequence[str]] = None | ||
) -> data.ReadIter[pa.Table]: | ||
"""Returns ``obs`` as an Arrow table iterator.""" | ||
raise NotImplementedError() | ||
|
||
@abc.abstractmethod | ||
def var( | ||
self, *, column_names: Optional[Sequence[str]] = None | ||
) -> data.ReadIter[pa.Table]: | ||
"""Returns ``var`` as an Arrow table iterator.""" | ||
raise NotImplementedError() | ||
|
||
@abc.abstractmethod | ||
def obs_joinids(self) -> pa.Array: | ||
"""Returns ``obs`` ``soma_joinids`` as an Arrow array.""" | ||
raise NotImplementedError() | ||
|
||
@abc.abstractmethod | ||
def var_joinids(self) -> pa.Array: | ||
"""Returns ``var`` ``soma_joinids`` as an Arrow array.""" | ||
raise NotImplementedError() | ||
|
||
@property | ||
def n_obs(self) -> int: | ||
"""The number of ``obs`` axis query results.""" | ||
return len(self.obs_joinids()) | ||
|
||
@property | ||
def n_vars(self) -> int: | ||
"""The number of ``var`` axis query results.""" | ||
return len(self.var_joinids()) | ||
|
||
@abc.abstractmethod | ||
def X(self, layer_name: str) -> data.SparseRead: | ||
"""Returns an ``X`` layer as ``SparseRead`` data. | ||
:param layer_name: The X layer name to return. | ||
""" | ||
raise NotImplementedError() | ||
|
||
@abc.abstractmethod | ||
def obsp(self, layer: str) -> data.SparseRead: | ||
"""Return an ``obsp`` layer as a SparseNDArrayRead""" | ||
raise NotImplementedError() | ||
|
||
@abc.abstractmethod | ||
def varp(self, layer: str) -> data.SparseRead: | ||
"""Return an ``varp`` layer as a SparseNDArrayRead""" | ||
raise NotImplementedError() | ||
|
||
@abc.abstractmethod | ||
def to_anndata( | ||
self, | ||
X_name: str, | ||
*, | ||
column_names: Optional[AxisColumnNames] = None, | ||
X_layers: Sequence[str] = (), | ||
) -> anndata.AnnData: | ||
""" | ||
Execute the query and return result as an ``AnnData`` in-memory object. | ||
:param X_name: The name of the X layer to read and return | ||
in the ``X`` slot. | ||
:param column_names: The columns in the ``var`` and ``obs`` dataframes | ||
to read. | ||
:param X_layers: Additional X layers to read and return | ||
in the ``layers`` slot. | ||
""" | ||
raise NotImplementedError() | ||
|
||
# Context management | ||
|
||
@abc.abstractmethod | ||
def close(self) -> None: | ||
"""Releases resources associated with this query. | ||
This method must be idempotent. | ||
""" | ||
raise NotImplementedError() | ||
|
||
def __exit__(self, *_: Any) -> None: | ||
self.close() | ||
|
||
def __del__(self) -> None: | ||
"""Ensure that we're closed when our last ref disappears.""" | ||
# If any superclass in our MRO has a __del__, call it. | ||
sdel = getattr(super(), "__del__", lambda: None) | ||
sdel() | ||
self.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from typing import Tuple | ||
|
||
import numpy as np | ||
import pytest | ||
from pytest import mark | ||
|
||
from somacore import options | ||
from somacore.query import axis | ||
|
||
|
||
@mark.parametrize( | ||
["coords", "want"], | ||
[ | ||
((slice(1, 10),), (slice(1, 10),)), | ||
([0, 1, 2], (0, 1, 2)), | ||
((slice(None), [0, 88, 1001]), (slice(None), (0, 88, 1001))), | ||
pytest.param( | ||
("string-coord",), | ||
("string-coord",), | ||
marks=mark.xfail(reason="strings not supported yet"), | ||
), | ||
pytest.param( | ||
(b"bytes-coord",), | ||
(b"bytes-coord",), | ||
marks=mark.xfail(reason="bytes not supported yet"), | ||
), | ||
], | ||
) | ||
def test_canonicalization( | ||
coords: options.SparseDFCoords, want: Tuple[options.SparseDFCoord, ...] | ||
) -> None: | ||
axq = axis.AxisQuery(coords=coords) | ||
assert want == axq.coords | ||
|
||
|
||
def test_canonicalization_nparray() -> None: | ||
axq = axis.AxisQuery(coords=(1, np.array([1, 2, 3]))) | ||
|
||
one, arr = axq.coords | ||
assert one == 1 | ||
assert (np.array([1, 2, 3]) == arr).all() | ||
|
||
|
||
@mark.parametrize( | ||
["coords"], | ||
[ | ||
("forbid bare strings",), | ||
(b"forbid bare byteses",), | ||
([1, 1.5, 2],), | ||
], | ||
) | ||
def test_canonicalization_bad(coords) -> None: | ||
with pytest.raises(TypeError): | ||
axis.AxisQuery(coords=coords) |