Skip to content

Commit

Permalink
[r] Metadata read/write support via libtiledbsoma (#2819)
Browse files Browse the repository at this point in the history
* [r] Metadata read/write support via libtiledbsoma

* Add 'factory' to work on either array or group

* get_all_metadata() now return a list

* Now with set_metadata including small libtiledbsoma update

Passes all tests

* Use SOMA*::create

* Removing temp skip on resuming dense writes

* Small cleanup

* Remove commented-out old code
  • Loading branch information
eddelbuettel authored Aug 7, 2024
1 parent 86e338f commit 274c6f4
Show file tree
Hide file tree
Showing 19 changed files with 489 additions and 33 deletions.
6 changes: 6 additions & 0 deletions apis/r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,17 @@ export(TileDBArray)
export(TileDBCreateOptions)
export(TileDBGroup)
export(TileDBObject)
export(delete_metadata)
export(extract_dataset)
export(get_all_metadata)
export(get_metadata)
export(get_metadata_num)
export(has_metadata)
export(list_datasets)
export(load_dataset)
export(matrixZeroBasedView)
export(set_log_level)
export(set_metadata)
export(show_package_versions)
export(soma_context)
export(tiledbsoma_stats_disable)
Expand Down
61 changes: 61 additions & 0 deletions apis/r/R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,67 @@ writeArrayFromArrow <- function(uri, naap, nasp, arraytype = "", config = NULL)
invisible(.Call(`_tiledbsoma_writeArrayFromArrow`, uri, naap, nasp, arraytype, config))
}

#' Get nnumber of metadata items
#' @param uri The array URI
#' @param ctxxp An external pointer to the SOMAContext wrapper
#' @export
get_metadata_num <- function(uri, is_array, ctxxp) {
.Call(`_tiledbsoma_get_metadata_num`, uri, is_array, ctxxp)
}

#' Read all metadata (as named list)
#'
#' This function currently supports metadata as either a string or an 'int64' (or 'int32').
#' It will error if a different datatype is encountered.
#' @param uri The array URI
#' @param ctxxp An external pointer to the SOMAContext wrapper
#' @export
get_all_metadata <- function(uri, is_array, ctxxp) {
.Call(`_tiledbsoma_get_all_metadata`, uri, is_array, ctxxp)
}

#' Read metadata (as a string)
#'
#' @param uri The array URI
#' @param key The array metadata key
#' @param ctxxp An external pointer to the SOMAContext wrapper
#' @export
get_metadata <- function(uri, key, is_array, ctxxp) {
.Call(`_tiledbsoma_get_metadata`, uri, key, is_array, ctxxp)
}

#' Check for metadata given key
#'
#' @param uri The array URI
#' @param key The array metadata key
#' @param ctxxp An external pointer to the SOMAContext wrapper
#' @export
has_metadata <- function(uri, key, is_array, ctxxp) {
.Call(`_tiledbsoma_has_metadata`, uri, key, is_array, ctxxp)
}

#' Delete metadata for given key
#'
#' @param uri The array URI
#' @param key The array metadata key
#' @param ctxxp An external pointer to the SOMAContext wrapper
#' @export
delete_metadata <- function(uri, key, is_array, ctxxp) {
invisible(.Call(`_tiledbsoma_delete_metadata`, uri, key, is_array, ctxxp))
}

#' Set metadata (as a string)
#'
#' @param uri The array URI
#' @param key The array metadata key
#' @param value The metadata value
#' @
#' @param ctxxp An external pointer to the SOMAContext wrapper
#' @export
set_metadata <- function(uri, key, valuesxp, type, is_array, ctxxp) {
invisible(.Call(`_tiledbsoma_set_metadata`, uri, key, valuesxp, type, is_array, ctxxp))
}

reindex_create <- function() {
.Call(`_tiledbsoma_reindex_create`)
}
Expand Down
6 changes: 0 additions & 6 deletions apis/r/R/SOMAOpen.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,6 @@ SOMAOpen <- function(uri, mode = "READ", platform_config = NULL,
stop("URI '", uri, "' is not a TileDB SOMA object.", call. = FALSE)
}

# Alternative: go via tiledb-r but needs to set up platform_config + ctx first
# TODO set to ctx from config and/or ctx for initial open too
#arr <- tiledb::tiledb_array(uri) # sadly this currently returns it closed
#arr <- tiledb::tiledb_array_open(arr, "READ") # TODO: just get it in opened state
#obj <- tiledb::tiledb_get_metadata(arr, "soma_object_type")

switch(obj,
SOMACollection = SOMACollectionOpen(uri, mode=mode, platform_config=platform_config, tiledbsoma_ctx=tiledbsoma_ctx, tiledb_timestamp=tiledb_timestamp),
SOMADataFrame = SOMADataFrameOpen(uri, mode=mode, platform_config=platform_config, tiledbsoma_ctx=tiledbsoma_ctx, tiledb_timestamp=tiledb_timestamp),
Expand Down
16 changes: 7 additions & 9 deletions apis/r/R/TileDBArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ TileDBArray <- R6::R6Class(
}

## TODO -- cannot do here while needed for array case does not work for data frame case
#tdbtype <- tiledb::datatype(tiledb::attrs(tiledb::schema(private$.tiledb_array))[[1]])
#private$.type <- arrow_type_from_tiledb_type(tdbtype)

private$update_metadata_cache()
Expand Down Expand Up @@ -101,13 +100,12 @@ TileDBArray <- R6::R6Class(

private$check_open_for_write()

dev_null <- mapply(
FUN = tiledb::tiledb_put_metadata,
key = names(metadata),
val = metadata,
MoreArgs = list(arr = self$object),
SIMPLIFY = FALSE
)
for (nm in names(metadata)) {
#spdl::warn("[set_metadata] key {}", nm)
val <- metadata[[nm]]
spdl::debug("[set_metadata] setting key {} to {} ({})", nm, val, class(val))
set_metadata(self$uri, nm, val, class(val), TRUE, soma_context())
}

dev_null <- mapply(
FUN = private$add_cached_metadata,
Expand Down Expand Up @@ -358,7 +356,7 @@ TileDBArray <- R6::R6Class(
array_handle <- tiledb::tiledb_array_open(array_handle, type = "READ")
}

private$.metadata_cache <- tiledb::tiledb_get_all_metadata(array_handle)
private$.metadata_cache <- get_all_metadata(self$uri, TRUE, soma_context())

if (private$.mode == "WRITE") {
tiledb::tiledb_array_close(array_handle)
Expand Down
18 changes: 18 additions & 0 deletions apis/r/man/delete_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions apis/r/man/get_all_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions apis/r/man/get_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions apis/r/man/get_metadata_num.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions apis/r/man/has_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions apis/r/man/set_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

88 changes: 88 additions & 0 deletions apis/r/src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,88 @@ BEGIN_RCPP
return R_NilValue;
END_RCPP
}
// get_metadata_num
int32_t get_metadata_num(std::string& uri, bool is_array, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_get_metadata_num(SEXP uriSEXP, SEXP is_arraySEXP, SEXP ctxxpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< bool >::type is_array(is_arraySEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
rcpp_result_gen = Rcpp::wrap(get_metadata_num(uri, is_array, ctxxp));
return rcpp_result_gen;
END_RCPP
}
// get_all_metadata
Rcpp::List get_all_metadata(std::string& uri, bool is_array, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_get_all_metadata(SEXP uriSEXP, SEXP is_arraySEXP, SEXP ctxxpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< bool >::type is_array(is_arraySEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
rcpp_result_gen = Rcpp::wrap(get_all_metadata(uri, is_array, ctxxp));
return rcpp_result_gen;
END_RCPP
}
// get_metadata
std::string get_metadata(std::string& uri, std::string& key, bool is_array, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_get_metadata(SEXP uriSEXP, SEXP keySEXP, SEXP is_arraySEXP, SEXP ctxxpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< std::string& >::type key(keySEXP);
Rcpp::traits::input_parameter< bool >::type is_array(is_arraySEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
rcpp_result_gen = Rcpp::wrap(get_metadata(uri, key, is_array, ctxxp));
return rcpp_result_gen;
END_RCPP
}
// has_metadata
bool has_metadata(std::string& uri, std::string& key, bool is_array, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_has_metadata(SEXP uriSEXP, SEXP keySEXP, SEXP is_arraySEXP, SEXP ctxxpSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< std::string& >::type key(keySEXP);
Rcpp::traits::input_parameter< bool >::type is_array(is_arraySEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
rcpp_result_gen = Rcpp::wrap(has_metadata(uri, key, is_array, ctxxp));
return rcpp_result_gen;
END_RCPP
}
// delete_metadata
void delete_metadata(std::string& uri, std::string& key, bool is_array, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_delete_metadata(SEXP uriSEXP, SEXP keySEXP, SEXP is_arraySEXP, SEXP ctxxpSEXP) {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< std::string& >::type key(keySEXP);
Rcpp::traits::input_parameter< bool >::type is_array(is_arraySEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
delete_metadata(uri, key, is_array, ctxxp);
return R_NilValue;
END_RCPP
}
// set_metadata
void set_metadata(std::string& uri, std::string& key, SEXP valuesxp, std::string& type, bool is_array, Rcpp::XPtr<somactx_wrap_t> ctxxp);
RcppExport SEXP _tiledbsoma_set_metadata(SEXP uriSEXP, SEXP keySEXP, SEXP valuesxpSEXP, SEXP typeSEXP, SEXP is_arraySEXP, SEXP ctxxpSEXP) {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string& >::type uri(uriSEXP);
Rcpp::traits::input_parameter< std::string& >::type key(keySEXP);
Rcpp::traits::input_parameter< SEXP >::type valuesxp(valuesxpSEXP);
Rcpp::traits::input_parameter< std::string& >::type type(typeSEXP);
Rcpp::traits::input_parameter< bool >::type is_array(is_arraySEXP);
Rcpp::traits::input_parameter< Rcpp::XPtr<somactx_wrap_t> >::type ctxxp(ctxxpSEXP);
set_metadata(uri, key, valuesxp, type, is_array, ctxxp);
return R_NilValue;
END_RCPP
}
// reindex_create
Rcpp::XPtr<tdbs::IntIndexer> reindex_create();
RcppExport SEXP _tiledbsoma_reindex_create() {
Expand Down Expand Up @@ -346,6 +428,12 @@ static const R_CallMethodDef CallEntries[] = {
{"_tiledbsoma_createSOMAContext", (DL_FUNC) &_tiledbsoma_createSOMAContext, 1},
{"_tiledbsoma_createSchemaFromArrow", (DL_FUNC) &_tiledbsoma_createSchemaFromArrow, 8},
{"_tiledbsoma_writeArrayFromArrow", (DL_FUNC) &_tiledbsoma_writeArrayFromArrow, 5},
{"_tiledbsoma_get_metadata_num", (DL_FUNC) &_tiledbsoma_get_metadata_num, 3},
{"_tiledbsoma_get_all_metadata", (DL_FUNC) &_tiledbsoma_get_all_metadata, 3},
{"_tiledbsoma_get_metadata", (DL_FUNC) &_tiledbsoma_get_metadata, 4},
{"_tiledbsoma_has_metadata", (DL_FUNC) &_tiledbsoma_has_metadata, 4},
{"_tiledbsoma_delete_metadata", (DL_FUNC) &_tiledbsoma_delete_metadata, 4},
{"_tiledbsoma_set_metadata", (DL_FUNC) &_tiledbsoma_set_metadata, 6},
{"_tiledbsoma_reindex_create", (DL_FUNC) &_tiledbsoma_reindex_create, 0},
{"_tiledbsoma_reindex_map", (DL_FUNC) &_tiledbsoma_reindex_map, 2},
{"_tiledbsoma_reindex_lookup", (DL_FUNC) &_tiledbsoma_reindex_lookup, 2},
Expand Down
29 changes: 21 additions & 8 deletions apis/r/src/arrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Rcpp::XPtr<somactx_wrap_t> createSOMAContext(Rcpp::Nullable<Rcpp::CharacterVecto
void createSchemaFromArrow(const std::string& uri, naxpSchema nasp, naxpArray nadimap, naxpSchema nadimsp,
bool sparse, std::string datatype, Rcpp::List pclst, Rcpp::XPtr<somactx_wrap_t> ctxxp) {


//struct ArrowArray* ap = (struct ArrowArray*) R_ExternalPtrAddr(naap);
//struct ArrowSchema* sp = (struct ArrowSchema*) R_ExternalPtrAddr(nasp);
//
Expand Down Expand Up @@ -118,14 +119,26 @@ void createSchemaFromArrow(const std::string& uri, naxpSchema nasp, naxpArray na
Rcpp::stop(tfm::format("Error: Array '%s' already exists", uri));
}

// create the ArraySchema
auto as = tdbs::ArrowAdapter::tiledb_schema_from_arrow_schema(ctx, std::move(schema),
std::pair(std::move(dimarr),
std::move(dimsch)),
datatype, sparse,
pltcfg);
// Create the schema at the given URI
tiledb::Array::create(uri, as);
if (datatype == "SOMADataFrame") {
tdbs::SOMADataFrame::create(uri, std::move(schema),
std::pair(std::move(dimarr), std::move(dimsch)),
sctx, pltcfg);
} else if (datatype == "SOMASparseNDArray") {
// for arrays n_children will be three as we have two dims and a data col
std::string datacoltype = sp->children[sp->n_children-1]->format;
tdbs::SOMASparseNDArray::create(uri, datacoltype,
std::pair(std::move(dimarr), std::move(dimsch)),
sctx, pltcfg);
} else if (datatype == "SOMADenseNDArray") {
// for arrays n_children will be three as we have two dims and a data col
std::string datacoltype = sp->children[sp->n_children-1]->format;
tdbs::SOMADenseNDArray::create(uri, datacoltype,
std::pair(std::move(dimarr), std::move(dimsch)),
sctx, pltcfg);
} else {
Rcpp::stop(tfm::format("Error: Invalid SOMA type_argument '%s'", datatype));
}

}


Expand Down
Loading

0 comments on commit 274c6f4

Please sign in to comment.