Skip to content

Commit

Permalink
[c++/python] Updates for core 2.27 (#3178)
Browse files Browse the repository at this point in the history
* Expose `embedded_version_triple` to `pybind11` layer

* fix up us-east-1 mod in pytest

* unit_soma_dense_ndarray.cc

* move have_dense_current_domain_support to test/common.cc

* unit_soma_collection.cc

* 2.26 testing

* [python] Support for current domain on dense arrays (#3179)

Co-authored-by: nguyenv <vivian@tiledb.com>

---------

Co-authored-by: nguyenv <vivian@tiledb.com>
  • Loading branch information
johnkerl and nguyenv authored Oct 16, 2024
1 parent 566d889 commit 22aa800
Show file tree
Hide file tree
Showing 9 changed files with 194 additions and 109 deletions.
52 changes: 41 additions & 11 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from ._arrow_types import pyarrow_to_carrow_type
from ._common_nd_array import NDArray
from ._exception import SOMAError, map_exception_for_create
from ._flags import NEW_SHAPE_FEATURE_FLAG_ENABLED
from ._tdb_handles import DenseNDArrayWrapper
from ._types import OpenTimestamp, Slice
from ._util import dense_indices_to_shape
Expand Down Expand Up @@ -103,15 +104,43 @@ def create(
for dim_idx, dim_shape in enumerate(shape):
dim_name = f"soma_dim_{dim_idx}"
pa_field = pa.field(dim_name, pa.int64())
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
dim_shape,
TileDBCreateOptions.from_platform_config(platform_config),
)
index_column_schema.append(pa_field)
# TODO: support current domain for dense arrays once we have core support.
# https://github.com/single-cell-data/TileDB-SOMA/issues/2955

if NEW_SHAPE_FEATURE_FLAG_ENABLED and clib.embedded_version_triple() >= (
2,
27,
0,
):
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
# The user specifies current domain -- this is the max domain
# which is taken from the max ranges for the dim datatype.
# We pass None here to detect those.
None,
TileDBCreateOptions.from_platform_config(platform_config),
)

if dim_shape == 0:
raise ValueError("DenseNDArray shape slots must be at least 1")
if dim_shape is None:
dim_shape = dim_capacity

index_column_data[pa_field.name] = [
0,
dim_capacity - 1,
dim_extent,
0,
dim_shape - 1,
]

else:
dim_capacity, dim_extent = cls._dim_capacity_and_extent(
dim_name,
dim_shape,
TileDBCreateOptions.from_platform_config(platform_config),
)

index_column_data[pa_field.name] = [0, dim_capacity - 1, dim_extent]
index_column_schema.append(pa_field)

index_column_info = pa.RecordBatch.from_pydict(
index_column_data, schema=pa.schema(index_column_schema)
Expand Down Expand Up @@ -309,9 +338,10 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None:
"""Supported for ``SparseNDArray``; scheduled for implementation for
``DenseNDArray`` in TileDB-SOMA 1.15
"""
# TODO: support current domain for dense arrays once we have core support.
# https://github.com/single-cell-data/TileDB-SOMA/issues/2955
raise NotImplementedError()
if clib.embedded_version_triple() >= (2, 27, 0):
self._handle.resize(newshape)
else:
raise NotImplementedError("Not implemented for libtiledbsoma < 2.27.0")

@classmethod
def _dim_capacity_and_extent(
Expand Down
14 changes: 8 additions & 6 deletions apis/python/src/tiledbsoma/_tdb_handles.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,19 +619,21 @@ def resize(self, newshape: Sequence[Union[int, None]]) -> None:
"""Supported for ``SparseNDArray``; scheduled for implementation for
``DenseNDArray`` in TileDB-SOMA 1.15
"""
# TODO: support current domain for dense arrays once we have core support.
# https://github.com/single-cell-data/TileDB-SOMA/issues/2955
raise NotImplementedError()
if clib.embedded_version_triple() >= (2, 27, 0):
self._handle.resize(newshape)
else:
raise NotImplementedError("Not implemented for libtiledbsoma < 2.27.0")

def tiledbsoma_can_resize(
self, newshape: Sequence[Union[int, None]]
) -> StatusAndReason:
"""Supported for ``SparseNDArray``; scheduled for implementation for
``DenseNDArray`` in TileDB-SOMA 1.15.
"""
# TODO: support current domain for dense arrays once we have core support.
# https://github.com/single-cell-data/TileDB-SOMA/issues/2955
raise NotImplementedError()
if clib.embedded_version_triple() >= (2, 27, 0):
return cast(StatusAndReason, self._handle.tiledbsoma_can_resize(newshape))
else:
raise NotImplementedError("Not implemented for libtiledbsoma < 2.27.0")


class SparseNDArrayWrapper(SOMAArrayWrapper[clib.SOMASparseNDArray]):
Expand Down
3 changes: 3 additions & 0 deletions apis/python/src/tiledbsoma/pytiledbsoma.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ PYBIND11_MODULE(pytiledbsoma, m) {
m.doc() = "SOMA acceleration library";

m.def("version", []() { return tiledbsoma::version::as_string(); });
m.def("embedded_version_triple", []() {
return tiledbsoma::version::embedded_version_triple();
});

m.def(
"config_logging",
Expand Down
5 changes: 4 additions & 1 deletion apis/python/tests/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,10 @@ def test_replace_config_after_construction():

# verify defaults expected by subsequent tests
assert context.timestamp_ms is None
assert context.native_context.config()["vfs.s3.region"] == "us-east-1"
if tiledbsoma.pytiledbsoma.embedded_version_triple() < (2, 27, 0):
assert context.native_context.config()["vfs.s3.region"] == "us-east-1"
else:
assert context.native_context.config()["vfs.s3.region"] == ""

now = int(time.time() * 1000)
open_ts = context._open_timestamp_ms(None)
Expand Down
12 changes: 8 additions & 4 deletions apis/python/tests/test_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,6 @@ def test_sparse_nd_array_basics(
(ok, msg) = snda.resize(new_shape, check_only=True)


## Pending 2.27 timeframe for dense support for current domain, including resize
## https://github.com/single-cell-data/TileDB-SOMA/issues/2955
def test_dense_nd_array_basics(tmp_path):
uri = tmp_path.as_posix()
shape = (100, 200)
Expand All @@ -212,12 +210,18 @@ def test_dense_nd_array_basics(tmp_path):
assert dnda.non_empty_domain() == ((0, 0), (0, 0))

with tiledbsoma.DenseNDArray.open(uri, "w") as dnda:
with pytest.raises(NotImplementedError):
if tiledbsoma.pytiledbsoma.embedded_version_triple() >= (2, 27, 0):
dnda.resize((300, 400))
else:
with pytest.raises(NotImplementedError):
dnda.resize((300, 400))

with tiledbsoma.DenseNDArray.open(uri) as dnda:
assert dnda.non_empty_domain() == ((0, 0), (0, 0))
assert dnda.shape == (100, 200)
if tiledbsoma.pytiledbsoma.embedded_version_triple() >= (2, 27, 0):
assert dnda.shape == (300, 400)
else:
assert dnda.shape == (100, 200)


@pytest.mark.parametrize(
Expand Down
6 changes: 6 additions & 0 deletions libtiledbsoma/test/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ static std::unique_ptr<ArrowSchema> _create_index_cols_info_schema(
static std::unique_ptr<ArrowArray> _create_index_cols_info_array(
const std::vector<DimInfo>& dim_infos);

// Core PRP: https://github.com/TileDB-Inc/TileDB/pull/5303
bool have_dense_current_domain_support() {
auto vers = tiledbsoma::version::embedded_version_triple();
return std::get<0>(vers) >= 2 && std::get<1>(vers) >= 27;
}

// Notes:
//
// * This is multi-purpose code used for generic SOMASparseNDArray,
Expand Down
3 changes: 3 additions & 0 deletions libtiledbsoma/test/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,8 @@ ArrowTable create_column_index_info(const std::vector<DimInfo>& dim_infos);

std::string to_arrow_format(tiledb_datatype_t tiledb_datatype);

// Core PR: https://github.com/TileDB-Inc/TileDB/pull/5303
bool have_dense_current_domain_support();

} // namespace helper
#endif
104 changes: 57 additions & 47 deletions libtiledbsoma/test/unit_soma_collection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,55 +111,65 @@ TEST_CASE("SOMACollection: add SOMASparseNDArray") {
}

TEST_CASE("SOMACollection: add SOMADenseNDArray") {
TimestampRange ts(0, 2);
auto ctx = std::make_shared<SOMAContext>();
std::string base_uri = "mem://unit-test-add-dense-ndarray";
std::string sub_uri = "mem://unit-test-add-dense-ndarray/sub";
std::string dim_name = "soma_dim_0";
tiledb_datatype_t tiledb_datatype = TILEDB_INT64;
std::string arrow_format = ArrowAdapter::tdb_to_arrow_type(tiledb_datatype);

SOMACollection::create(base_uri, ctx, ts);
// TODO: add support for current domain in dense arrays once we have that
// support from core
// https://github.com/single-cell-data/TileDB-SOMA/issues/2955
std::vector<helper::DimInfo> dim_infos(
{{.name = dim_name,
.tiledb_datatype = tiledb_datatype,
.dim_max = DIM_MAX,
.string_lo = "N/A",
.string_hi = "N/A",
.use_current_domain = false}});
auto index_columns = helper::create_column_index_info(dim_infos);

std::map<std::string, SOMAGroupEntry> expected_map{
{"dense_ndarray", SOMAGroupEntry(sub_uri, "SOMAArray")}};

auto soma_collection = SOMACollection::open(
base_uri, OpenMode::write, ctx, ts);
REQUIRE(soma_collection->timestamp() == ts);
auto use_current_domain = GENERATE(false, true);
// TODO this could be formatted with fmt::format which is part of internal
// header spd/log/fmt/fmt.h and should not be used. In C++20, this can be
// replaced with std::format.
std::ostringstream section;
section << "- use_current_domain=" << use_current_domain;
SECTION(section.str()) {
TimestampRange ts(0, 2);
auto ctx = std::make_shared<SOMAContext>();
std::string base_uri = "mem://unit-test-add-dense-ndarray";
std::string sub_uri = "mem://unit-test-add-dense-ndarray/sub";
std::string dim_name = "soma_dim_0";
tiledb_datatype_t tiledb_datatype = TILEDB_INT64;
std::string arrow_format = ArrowAdapter::tdb_to_arrow_type(
tiledb_datatype);

auto soma_dense = soma_collection->add_new_dense_ndarray(
"dense_ndarray",
sub_uri,
URIType::absolute,
ctx,
arrow_format,
ArrowTable(
std::move(index_columns.first), std::move(index_columns.second)));
REQUIRE(soma_collection->members_map() == expected_map);
REQUIRE(soma_dense->uri() == sub_uri);
REQUIRE(soma_dense->ctx() == ctx);
REQUIRE(soma_dense->type() == "SOMADenseNDArray");
REQUIRE(soma_dense->is_sparse() == false);
REQUIRE(soma_dense->ndim() == 1);
REQUIRE(soma_dense->shape() == std::vector<int64_t>{DIM_MAX + 1});
REQUIRE(soma_dense->timestamp() == ts);
soma_collection->close();
SOMACollection::create(base_uri, ctx, ts);
std::vector<helper::DimInfo> dim_infos(
{{.name = dim_name,
.tiledb_datatype = tiledb_datatype,
.dim_max = DIM_MAX,
.string_lo = "N/A",
.string_hi = "N/A",
.use_current_domain = use_current_domain}});
auto index_columns = helper::create_column_index_info(dim_infos);

soma_collection = SOMACollection::open(base_uri, OpenMode::read, ctx);
REQUIRE(soma_collection->members_map() == expected_map);
soma_collection->close();
std::map<std::string, SOMAGroupEntry> expected_map{
{"dense_ndarray", SOMAGroupEntry(sub_uri, "SOMAArray")}};

auto soma_collection = SOMACollection::open(
base_uri, OpenMode::write, ctx, ts);
REQUIRE(soma_collection->timestamp() == ts);

if (helper::have_dense_current_domain_support()) {
auto soma_dense = soma_collection->add_new_dense_ndarray(
"dense_ndarray",
sub_uri,
URIType::absolute,
ctx,
arrow_format,
ArrowTable(
std::move(index_columns.first),
std::move(index_columns.second)));
REQUIRE(soma_collection->members_map() == expected_map);
REQUIRE(soma_dense->uri() == sub_uri);
REQUIRE(soma_dense->ctx() == ctx);
REQUIRE(soma_dense->type() == "SOMADenseNDArray");
REQUIRE(soma_dense->is_sparse() == false);
REQUIRE(soma_dense->ndim() == 1);
REQUIRE(soma_dense->shape() == std::vector<int64_t>{DIM_MAX + 1});
REQUIRE(soma_dense->timestamp() == ts);
soma_collection->close();

soma_collection = SOMACollection::open(
base_uri, OpenMode::read, ctx);
REQUIRE(soma_collection->members_map() == expected_map);
soma_collection->close();
}
}
}

TEST_CASE("SOMACollection: add SOMADataFrame") {
Expand Down
Loading

0 comments on commit 22aa800

Please sign in to comment.