From 0b87a732121bf23ed26191e38dc65b40e68a1e62 Mon Sep 17 00:00:00 2001 From: nguyenv Date: Tue, 15 Oct 2024 19:46:45 -0500 Subject: [PATCH] [python] Various tiledb-py removals from pytest (#3170) * `test_basic_anndata_io.py` - `test_null_obs`: Use arrow schema to check nullability - `test_export_obsm_with_holes`: Use `Experiment.metadata` directly * `test_dataframe.py::test_enum_schema_report`: Remove redundant type checking against tiledb-py. Checks are already encompassed by Arrow schema check below * `test_dense_nd_array.pytest_read_to_unwritten_array`: create expected data output by create a numpy array with the fill value rather than pulling from tiledb-py read query --- apis/python/tests/test_basic_anndata_io.py | 84 +++++++++---------- apis/python/tests/test_dataframe.py | 20 ----- .../tests/test_dataframe_index_columns.py | 2 +- apis/python/tests/test_dense_nd_array.py | 7 +- apis/python/tests/test_experiment_query.py | 12 +-- 5 files changed, 49 insertions(+), 76 deletions(-) diff --git a/apis/python/tests/test_basic_anndata_io.py b/apis/python/tests/test_basic_anndata_io.py index 7563f714ae..22319fe8b2 100644 --- a/apis/python/tests/test_basic_anndata_io.py +++ b/apis/python/tests/test_basic_anndata_io.py @@ -18,7 +18,6 @@ from tiledbsoma import Experiment, _constants, _factory from tiledbsoma._soma_object import SOMAObject from tiledbsoma.io._common import _TILEDBSOMA_TYPE, UnsDict, UnsMapping -import tiledb from ._util import TESTDATA, assert_adata_equal, make_pd_df @@ -760,24 +759,24 @@ def test_null_obs(conftest_pbmc_small, tmp_path: Path): ) assert_adata_equal(original, conftest_pbmc_small) - exp = tiledbsoma.Experiment.open(uri) - with tiledb.open(exp.obs.uri, "r") as obs: - # Explicitly check columns created above - assert obs.attr("empty_categorical_all").isnullable - assert obs.attr("empty_categorical_partial").isnullable - assert obs.attr("empty_extension_all").isnullable - assert obs.attr("empty_extension_partial").isnullable - # For every column in the data frame - # ensure that `isnullable` reflects the null-ness - # of the Pandas data frame + with tiledbsoma.Experiment.open(uri) as exp: + schema = exp.obs.schema.field + + # Explicitly check columns created above + assert schema("empty_categorical_all").nullable + assert schema("empty_categorical_partial").nullable + assert schema("empty_extension_all").nullable + assert schema("empty_extension_partial").nullable + + # For every column in the data frame ensure that `isnullable` reflects + # he null-ness of the Pandas data frame for k in conftest_pbmc_small.obs: - assert obs.attr(k).isnullable + assert schema(k).nullable def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path): adata = anndata.read_h5ad(h5ad_file_with_obsm_holes.as_posix()) original = adata.copy() - assert 1 == 1 # This data file is prepared such that obsm["X_pca"] has shape (2638, 50) # but its [0][0] element is a 0, so when it's stored as sparse, its nnz @@ -790,48 +789,47 @@ def test_export_obsm_with_holes(h5ad_file_with_obsm_holes, tmp_path): assert_adata_equal(original, adata) - exp = tiledbsoma.Experiment.open(output_path) - # Verify the bounding box on the SOMA SparseNDArray - with tiledb.open(exp.ms["RNA"].obsm["X_pca"].uri) as so: - assert so.meta["soma_dim_0_domain_lower"] == 0 - assert so.meta["soma_dim_0_domain_upper"] == 2637 - assert so.meta["soma_dim_1_domain_lower"] == 0 - assert so.meta["soma_dim_1_domain_upper"] == 49 + with tiledbsoma.Experiment.open(output_path) as exp: + meta = exp.ms["RNA"].obsm["X_pca"].metadata + assert meta["soma_dim_0_domain_lower"] == 0 + assert meta["soma_dim_0_domain_upper"] == 2637 + assert meta["soma_dim_1_domain_lower"] == 0 + assert meta["soma_dim_1_domain_upper"] == 49 - # With the bounding box present, all is well for outgest to AnnData format. - try1 = tiledbsoma.io.to_anndata(exp, "RNA") - assert try1.obsm["X_pca"].shape == (2638, 50) + # With the bounding box present, all is well for outgest to AnnData format. + try1 = tiledbsoma.io.to_anndata(exp, "RNA") + assert try1.obsm["X_pca"].shape == (2638, 50) # Now remove the bounding box to simulate reading older data that lacks a bounding box. - with tiledb.open(exp.ms["RNA"].obsm["X_pca"].uri, "w") as so: - del so.meta["soma_dim_0_domain_lower"] - del so.meta["soma_dim_0_domain_upper"] - del so.meta["soma_dim_1_domain_lower"] - del so.meta["soma_dim_1_domain_upper"] + with tiledbsoma.Experiment.open(output_path, "w") as exp: + meta = exp.ms["RNA"].obsm["X_pca"].metadata + del meta["soma_dim_0_domain_lower"] + del meta["soma_dim_0_domain_upper"] + del meta["soma_dim_1_domain_lower"] + del meta["soma_dim_1_domain_upper"] # Re-open to simulate opening afresh a bounding-box-free array. - exp = tiledbsoma.Experiment.open(output_path) - - with tiledb.open(exp.ms["RNA"].obsm["X_pca"].uri) as so: + with tiledbsoma.Experiment.open(output_path) as exp: + meta = exp.ms["RNA"].obsm["X_pca"].metadata with pytest.raises(KeyError): - so.meta["soma_dim_0_domain_lower"] + meta["soma_dim_0_domain_lower"] with pytest.raises(KeyError): - so.meta["soma_dim_0_domain_upper"] + meta["soma_dim_0_domain_upper"] with pytest.raises(KeyError): - so.meta["soma_dim_1_domain_lower"] + meta["soma_dim_1_domain_lower"] with pytest.raises(KeyError): - so.meta["soma_dim_1_domain_upper"] - assert so.meta["soma_object_type"] == "SOMASparseNDArray" + meta["soma_dim_1_domain_upper"] + assert meta["soma_object_type"] == "SOMASparseNDArray" - # Now try the remaining options for outgest. - with pytest.raises(tiledbsoma.SOMAError): - tiledbsoma.io.to_anndata(exp, "RNA") + # Now try the remaining options for outgest. + with pytest.raises(tiledbsoma.SOMAError): + tiledbsoma.io.to_anndata(exp, "RNA") - try3 = tiledbsoma.io.to_anndata( - exp, "RNA", obsm_varm_width_hints={"obsm": {"X_pca": 50}} - ) - assert try3.obsm["X_pca"].shape == (2638, 50) + try3 = tiledbsoma.io.to_anndata( + exp, "RNA", obsm_varm_width_hints={"obsm": {"X_pca": 50}} + ) + assert try3.obsm["X_pca"].shape == (2638, 50) def test_X_empty(h5ad_file_X_empty): diff --git a/apis/python/tests/test_dataframe.py b/apis/python/tests/test_dataframe.py index 7882993792..c876b1183b 100644 --- a/apis/python/tests/test_dataframe.py +++ b/apis/python/tests/test_dataframe.py @@ -12,7 +12,6 @@ from pandas.api.types import union_categoricals import tiledbsoma as soma -import tiledb from tests._util import raises_no_typeguard @@ -1593,25 +1592,6 @@ def test_enum_schema_report(tmp_path): arrow_table = pa.Table.from_pandas(pandas_df, preserve_index=False) sdf.write(arrow_table) - # Double-check against TileDB-Py reporting - with tiledb.open(uri) as A: - for i in range(A.schema.nattr): - attr = A.schema.attr(i) - try: - index_type = attr.dtype - value_type = A.enum(attr.name).dtype - except tiledb.cc.TileDBError: - pass # not an enum attr - if attr.name == "int_cat": - assert index_type.name == "int8" - assert value_type.name == "int64" - elif attr.name == "str_cat": - assert index_type.name == "int8" - assert value_type.name == "str32" - elif attr.name == "byte_cat": - assert index_type.name == "int8" - assert value_type.name == "bytes8" - # Verify SOMA Arrow schema with soma.open(uri) as sdf: f = sdf.schema.field("int_cat") diff --git a/apis/python/tests/test_dataframe_index_columns.py b/apis/python/tests/test_dataframe_index_columns.py index 490039230a..21ec7df276 100644 --- a/apis/python/tests/test_dataframe_index_columns.py +++ b/apis/python/tests/test_dataframe_index_columns.py @@ -1898,6 +1898,6 @@ def test_types_read_errors( with soma.DataFrame.open(uri, "w") as sdf: sdf.write(arrow_table) - with pytest.raises((soma.SOMAError)): + with pytest.raises(soma.SOMAError): with soma.DataFrame.open(uri, "r") as sdf: sdf.read(coords=coords).concat() diff --git a/apis/python/tests/test_dense_nd_array.py b/apis/python/tests/test_dense_nd_array.py index 47feb19c38..785d8f97c7 100644 --- a/apis/python/tests/test_dense_nd_array.py +++ b/apis/python/tests/test_dense_nd_array.py @@ -494,10 +494,5 @@ def test_read_to_unwritten_array(tmp_path, shape): soma.DenseNDArray.create(uri, type=pa.uint8(), shape=shape) - with tiledb.open(uri, "r") as A: - expected = A[:]["soma_data"] - with soma.DenseNDArray.open(uri, "r") as A: - actual = A.read().to_numpy() - - assert np.array_equal(expected, actual) + assert np.array_equal(np.ones(shape) * 255, A.read().to_numpy()) diff --git a/apis/python/tests/test_experiment_query.py b/apis/python/tests/test_experiment_query.py index 1897a273f3..12ede30a12 100644 --- a/apis/python/tests/test_experiment_query.py +++ b/apis/python/tests/test_experiment_query.py @@ -1,3 +1,4 @@ +import re from concurrent import futures from contextlib import nullcontext from typing import Tuple @@ -15,7 +16,6 @@ from tiledbsoma import SOMATileDBContext, _factory from tiledbsoma._collection import CollectionBase from tiledbsoma.experiment_query import X_as_series -import tiledb from tests._util import raises_no_typeguard @@ -950,11 +950,11 @@ def test_empty_categorical_query(conftest_pbmc_small_exp): measurement_name="RNA", obs_query=AxisQuery(value_filter='groups == "foo"') ) # Empty query on a categorical column raised ArrowInvalid before TileDB 2.21; see https://github.com/single-cell-data/TileDB-SOMA/pull/2299 - ctx = ( - nullcontext() - if tiledb.libtiledb.version() >= (2, 21) - else pytest.raises(ArrowInvalid) - ) + m = re.fullmatch(r"libtiledb=(\d+\.\d+\.\d+)", soma.pytiledbsoma.version()) + version = m.group(1).split(".") + major, minor = int(version[0]), int(version[1]) + + ctx = nullcontext() if (major, minor) >= (2, 21) else pytest.raises(ArrowInvalid) with ctx: obs = q.obs().concat() assert len(obs) == 0