Skip to content

Commit

Permalink
[python] Various tiledb-py removals from pytest (#3170)
Browse files Browse the repository at this point in the history
* `test_basic_anndata_io.py`
  - `test_null_obs`: Use arrow schema to check nullability
  - `test_export_obsm_with_holes`: Use `Experiment.metadata` directly
* `test_dataframe.py::test_enum_schema_report`: Remove redundant
   type checking against tiledb-py. Checks are already encompassed
   by Arrow schema check below
* `test_dense_nd_array.pytest_read_to_unwritten_array`: create expected
  data output by create a numpy array with the fill value rather than
  pulling from tiledb-py read query
  • Loading branch information
nguyenv authored Oct 16, 2024
1 parent 4b3f507 commit 0b87a73
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 76 deletions.
84 changes: 41 additions & 43 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from tiledbsoma import Experiment, _constants, _factory
from tiledbsoma._soma_object import SOMAObject
from tiledbsoma.io._common import _TILEDBSOMA_TYPE, UnsDict, UnsMapping
import tiledb

from ._util import TESTDATA, assert_adata_equal, make_pd_df

Expand Down Expand Up @@ -760,24 +759,24 @@ def test_null_obs(conftest_pbmc_small, tmp_path: Path):
)
assert_adata_equal(original, conftest_pbmc_small)

exp = tiledbsoma.Experiment.open(uri)
with tiledb.open(exp.obs.uri, "r") as obs:
# Explicitly check columns created above
assert obs.attr("empty_categorical_all").isnullable
assert obs.attr("empty_categorical_partial").isnullable
assert obs.attr("empty_extension_all").isnullable
assert obs.attr("empty_extension_partial").isnullable
# For every column in the data frame
# ensure that `isnullable` reflects the null-ness
# of the Pandas data frame
with tiledbsoma.Experiment.open(uri) as exp:
schema = exp.obs.schema.field

# Explicitly check columns created above
assert schema("empty_categorical_all").nullable
assert schema("empty_categorical_partial").nullable
assert schema("empty_extension_all").nullable
assert schema("empty_extension_partial").nullable

# For every column in the data frame ensure that `isnullable` reflects
# he null-ness of the Pandas data frame
for k in conftest_pbmc_small.obs:
assert obs.attr(k).isnullable
assert schema(k).nullable


def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path):
adata = anndata.read_h5ad(h5ad_file_with_obsm_holes.as_posix())
original = adata.copy()
assert 1 == 1

# This data file is prepared such that obsm["X_pca"] has shape (2638, 50)
# but its [0][0] element is a 0, so when it's stored as sparse, its nnz
Expand All @@ -790,48 +789,47 @@ def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path):

assert_adata_equal(original, adata)

exp = tiledbsoma.Experiment.open(output_path)

# Verify the bounding box on the SOMA SparseNDArray
with tiledb.open(exp.ms["RNA"].obsm["X_pca"].uri) as so:
assert so.meta["soma_dim_0_domain_lower"] == 0
assert so.meta["soma_dim_0_domain_upper"] == 2637
assert so.meta["soma_dim_1_domain_lower"] == 0
assert so.meta["soma_dim_1_domain_upper"] == 49
with tiledbsoma.Experiment.open(output_path) as exp:
meta = exp.ms["RNA"].obsm["X_pca"].metadata
assert meta["soma_dim_0_domain_lower"] == 0
assert meta["soma_dim_0_domain_upper"] == 2637
assert meta["soma_dim_1_domain_lower"] == 0
assert meta["soma_dim_1_domain_upper"] == 49

# With the bounding box present, all is well for outgest to AnnData format.
try1 = tiledbsoma.io.to_anndata(exp, "RNA")
assert try1.obsm["X_pca"].shape == (2638, 50)
# With the bounding box present, all is well for outgest to AnnData format.
try1 = tiledbsoma.io.to_anndata(exp, "RNA")
assert try1.obsm["X_pca"].shape == (2638, 50)

# Now remove the bounding box to simulate reading older data that lacks a bounding box.
with tiledb.open(exp.ms["RNA"].obsm["X_pca"].uri, "w") as so:
del so.meta["soma_dim_0_domain_lower"]
del so.meta["soma_dim_0_domain_upper"]
del so.meta["soma_dim_1_domain_lower"]
del so.meta["soma_dim_1_domain_upper"]
with tiledbsoma.Experiment.open(output_path, "w") as exp:
meta = exp.ms["RNA"].obsm["X_pca"].metadata
del meta["soma_dim_0_domain_lower"]
del meta["soma_dim_0_domain_upper"]
del meta["soma_dim_1_domain_lower"]
del meta["soma_dim_1_domain_upper"]

# Re-open to simulate opening afresh a bounding-box-free array.
exp = tiledbsoma.Experiment.open(output_path)

with tiledb.open(exp.ms["RNA"].obsm["X_pca"].uri) as so:
with tiledbsoma.Experiment.open(output_path) as exp:
meta = exp.ms["RNA"].obsm["X_pca"].metadata
with pytest.raises(KeyError):
so.meta["soma_dim_0_domain_lower"]
meta["soma_dim_0_domain_lower"]
with pytest.raises(KeyError):
so.meta["soma_dim_0_domain_upper"]
meta["soma_dim_0_domain_upper"]
with pytest.raises(KeyError):
so.meta["soma_dim_1_domain_lower"]
meta["soma_dim_1_domain_lower"]
with pytest.raises(KeyError):
so.meta["soma_dim_1_domain_upper"]
assert so.meta["soma_object_type"] == "SOMASparseNDArray"
meta["soma_dim_1_domain_upper"]
assert meta["soma_object_type"] == "SOMASparseNDArray"

# Now try the remaining options for outgest.
with pytest.raises(tiledbsoma.SOMAError):
tiledbsoma.io.to_anndata(exp, "RNA")
# Now try the remaining options for outgest.
with pytest.raises(tiledbsoma.SOMAError):
tiledbsoma.io.to_anndata(exp, "RNA")

try3 = tiledbsoma.io.to_anndata(
exp, "RNA", obsm_varm_width_hints={"obsm": {"X_pca": 50}}
)
assert try3.obsm["X_pca"].shape == (2638, 50)
try3 = tiledbsoma.io.to_anndata(
exp, "RNA", obsm_varm_width_hints={"obsm": {"X_pca": 50}}
)
assert try3.obsm["X_pca"].shape == (2638, 50)


def test_X_empty(h5ad_file_X_empty):
Expand Down
20 changes: 0 additions & 20 deletions apis/python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from pandas.api.types import union_categoricals

import tiledbsoma as soma
import tiledb

from tests._util import raises_no_typeguard

Expand Down Expand Up @@ -1593,25 +1592,6 @@ def test_enum_schema_report(tmp_path):
arrow_table = pa.Table.from_pandas(pandas_df, preserve_index=False)
sdf.write(arrow_table)

# Double-check against TileDB-Py reporting
with tiledb.open(uri) as A:
for i in range(A.schema.nattr):
attr = A.schema.attr(i)
try:
index_type = attr.dtype
value_type = A.enum(attr.name).dtype
except tiledb.cc.TileDBError:
pass # not an enum attr
if attr.name == "int_cat":
assert index_type.name == "int8"
assert value_type.name == "int64"
elif attr.name == "str_cat":
assert index_type.name == "int8"
assert value_type.name == "str32"
elif attr.name == "byte_cat":
assert index_type.name == "int8"
assert value_type.name == "bytes8"

# Verify SOMA Arrow schema
with soma.open(uri) as sdf:
f = sdf.schema.field("int_cat")
Expand Down
2 changes: 1 addition & 1 deletion apis/python/tests/test_dataframe_index_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -1898,6 +1898,6 @@ def test_types_read_errors(
with soma.DataFrame.open(uri, "w") as sdf:
sdf.write(arrow_table)

with pytest.raises((soma.SOMAError)):
with pytest.raises(soma.SOMAError):
with soma.DataFrame.open(uri, "r") as sdf:
sdf.read(coords=coords).concat()
7 changes: 1 addition & 6 deletions apis/python/tests/test_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,10 +494,5 @@ def test_read_to_unwritten_array(tmp_path, shape):

soma.DenseNDArray.create(uri, type=pa.uint8(), shape=shape)

with tiledb.open(uri, "r") as A:
expected = A[:]["soma_data"]

with soma.DenseNDArray.open(uri, "r") as A:
actual = A.read().to_numpy()

assert np.array_equal(expected, actual)
assert np.array_equal(np.ones(shape) * 255, A.read().to_numpy())
12 changes: 6 additions & 6 deletions apis/python/tests/test_experiment_query.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from concurrent import futures
from contextlib import nullcontext
from typing import Tuple
Expand All @@ -15,7 +16,6 @@
from tiledbsoma import SOMATileDBContext, _factory
from tiledbsoma._collection import CollectionBase
from tiledbsoma.experiment_query import X_as_series
import tiledb

from tests._util import raises_no_typeguard

Expand Down Expand Up @@ -950,11 +950,11 @@ def test_empty_categorical_query(conftest_pbmc_small_exp):
measurement_name="RNA", obs_query=AxisQuery(value_filter='groups == "foo"')
)
# Empty query on a categorical column raised ArrowInvalid before TileDB 2.21; see https://github.com/single-cell-data/TileDB-SOMA/pull/2299
ctx = (
nullcontext()
if tiledb.libtiledb.version() >= (2, 21)
else pytest.raises(ArrowInvalid)
)
m = re.fullmatch(r"libtiledb=(\d+\.\d+\.\d+)", soma.pytiledbsoma.version())
version = m.group(1).split(".")
major, minor = int(version[0]), int(version[1])

ctx = nullcontext() if (major, minor) >= (2, 21) else pytest.raises(ArrowInvalid)
with ctx:
obs = q.obs().concat()
assert len(obs) == 0

0 comments on commit 0b87a73

Please sign in to comment.