diff --git a/apis/r/DESCRIPTION b/apis/r/DESCRIPTION index c688df1961..ed931b0f33 100644 --- a/apis/r/DESCRIPTION +++ b/apis/r/DESCRIPTION @@ -6,7 +6,7 @@ Description: Interface for working with 'TileDB'-based Stack of Matrices, like those commonly used for single cell data analysis. It is documented at ; a formal specification available is at . -Version: 1.15.99.2 +Version: 1.15.99.3 Authors@R: c( person(given = "Aaron", family = "Wolen", role = c("cre", "aut"), email = "aaron@tiledb.com", diff --git a/apis/r/NAMESPACE b/apis/r/NAMESPACE index 522816de8a..38be0a27ee 100644 --- a/apis/r/NAMESPACE +++ b/apis/r/NAMESPACE @@ -11,6 +11,9 @@ S3method(itertools::hasNext,CoordsStrider) S3method(length,CoordsStrider) S3method(length,MappingBase) S3method(names,MappingBase) +S3method(r_type_from_arrow_type,DataType) +S3method(r_type_from_arrow_type,Field) +S3method(r_type_from_arrow_type,Schema) S3method(write_soma,Assay) S3method(write_soma,DataFrame) S3method(write_soma,DimReduc) @@ -84,6 +87,7 @@ export(has_metadata) export(list_datasets) export(load_dataset) export(matrixZeroBasedView) +export(r_type_from_arrow_type) export(set_log_level) export(set_metadata) export(show_package_versions) diff --git a/apis/r/NEWS.md b/apis/r/NEWS.md index 72006387a6..912dcdad8c 100644 --- a/apis/r/NEWS.md +++ b/apis/r/NEWS.md @@ -9,6 +9,7 @@ * Push `schema` accessor down to `libtiledbsoma` [#3079](https://github.com/single-cell-data/TileDB-SOMA/pull/3079) * Handle `numeric` coords properly when reading arrays * Remove two more `tiledb::schema` callsites [#3160](https://github.com/single-cell-data/TileDB-SOMA/pull/3160) +* Add new Arrow-to-R type mapper # tiledbsoma 1.14.1 diff --git a/apis/r/R/TileDBArray.R b/apis/r/R/TileDBArray.R index 2d4ec6aa6c..54ece1450d 100644 --- a/apis/r/R/TileDBArray.R +++ b/apis/r/R/TileDBArray.R @@ -132,8 +132,7 @@ TileDBArray <- R6::R6Class( #' @description Retrieve the array schema as an Arrow schema (lifecycle: maturing) #' @return A [`arrow::schema`] object schema = function() { - arrow::as_schema( - c_schema(self$uri, private$.soma_context)); + return(arrow::as_schema(c_schema(self$uri, private$.soma_context))) }, #' @description Retrieve the array schema as TileDB schema (lifecycle: maturing) diff --git a/apis/r/R/utils-arrow.R b/apis/r/R/utils-arrow.R index a7d3d4df3e..ac429720f7 100644 --- a/apis/r/R/utils-arrow.R +++ b/apis/r/R/utils-arrow.R @@ -114,6 +114,92 @@ arrow_type_from_tiledb_type <- function(x) { ) } +#' Get the \R Type from an Arrow Type +#' +#' Get an \R \link[base:typeof]{type} from an Arrow type. This function is +#' equivalent to \code{\link[base]{typeof}()} rather than +#' \code{\link[base]{mode}()} or \code{\link[base]{class}()}, and returns the +#' equivalent \strong{type}. For example, the equivalent \R type to an Arrow +#' \link[arrow]{dictionary} is \dQuote{\code{integer}}, not +#' \dQuote{\code{factor}}; likewise, the equivalent \R type to an Arrow 64-bit +#' integer is \dQuote{\code{double}} +#' +#' @param x An \CRANpkg{Arrow} \link[arrow:Schema]{schema}, +#' \link[arrow:Field]{field}, or \link[arrow:infer_type]{data type} +#' +#' @return If \code{x} is a \link[arrow:infer_type]{data type}, a single +#' character value giving the \R \link[base:typeof]{type} of \code{x}; if no +#' corresponding \R type, returns the \CRANpkg{Arrow} type name +#' +#' @return If \code{x} is a \link[arrow:Field]{field}, a single named character +#' vector with the name being the field name and the value being the \R +#' \link[base:typeof]{type} +#' +#' @return If \code{x} is a \link[arrow:Schema]{schema}, a named vector where +#' the names are field names and the values are the \R \link[base:typeof]{types} +#' of each field +#' +#' @keywords internal +#' +#' @export +#' +#' @seealso \code{\link[base]{typeof}()} +#' +r_type_from_arrow_type <- function(x) UseMethod('r_type_from_arrow_type') + +#' @rdname r_type_from_arrow_type +#' +#' @method r_type_from_arrow_type Schema +#' @export +#' +r_type_from_arrow_type.Schema <- function(x) { + return(vapply( + X = x$names, + FUN = function(f) r_type_from_arrow_type(x[[f]]), + FUN.VALUE = character(1L), + USE.NAMES = TRUE + )) +} + +#' @rdname r_type_from_arrow_type +#' +#' @method r_type_from_arrow_type Field +#' @export +#' +r_type_from_arrow_type.Field <- function(x) { + tt <- r_type_from_arrow_type(x$type) + names(x = tt) <- x$name + return(tt) +} + +#' @rdname r_type_from_arrow_type +#' +#' @method r_type_from_arrow_type DataType +#' @export +#' +r_type_from_arrow_type.DataType <- function(x) { + # Types are equivalent to `typeof()`, not `mode()` or `class()` + return(switch( + EXPR = x$name, + int8 = , + int16 = , + int32 = , + dictionary = , + uint8 = , + uint16 = , + uint32 = 'integer', + int64 = , + uint64 = , + date32 = , + timestamp = , + float = 'double', + bool = 'logical', + utf8 = , + large_utf8 = 'character', + x$name + )) +} + #' Retrieve limits for Arrow types #' @importFrom bit64 lim.integer64 #' @noRd @@ -174,10 +260,9 @@ arrow_field_from_tiledb_dim <- function(x) { ## With a nod to Kevin Ushey #' @noRd yoink <- function(package, symbol) { - do.call(":::", list(package, symbol)) + do.call(":::", list(package, symbol)) } - #' Create an Arrow field from a TileDB attribute #' @noRd arrow_field_from_tiledb_attr <- function(x, arrptr=NULL) { diff --git a/apis/r/man/r_type_from_arrow_type.Rd b/apis/r/man/r_type_from_arrow_type.Rd new file mode 100644 index 0000000000..4bb62a59c7 --- /dev/null +++ b/apis/r/man/r_type_from_arrow_type.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-arrow.R +\name{r_type_from_arrow_type} +\alias{r_type_from_arrow_type} +\alias{r_type_from_arrow_type.Schema} +\alias{r_type_from_arrow_type.Field} +\alias{r_type_from_arrow_type.DataType} +\title{Get the \R Type from an Arrow Type} +\usage{ +r_type_from_arrow_type(x) + +\method{r_type_from_arrow_type}{Schema}(x) + +\method{r_type_from_arrow_type}{Field}(x) + +\method{r_type_from_arrow_type}{DataType}(x) +} +\arguments{ +\item{x}{An \CRANpkg{Arrow} \link[arrow:Schema]{schema}, +\link[arrow:Field]{field}, or \link[arrow:infer_type]{data type}} +} +\value{ +If \code{x} is a \link[arrow:infer_type]{data type}, a single +character value giving the \R \link[base:typeof]{type} of \code{x}; if no +corresponding \R type, returns the \CRANpkg{Arrow} type name + +If \code{x} is a \link[arrow:Field]{field}, a single named character +vector with the name being the field name and the value being the \R +\link[base:typeof]{type} + +If \code{x} is a \link[arrow:Schema]{schema}, a named vector where +the names are field names and the values are the \R \link[base:typeof]{types} +of each field +} +\description{ +Get an \R \link[base:typeof]{type} from an Arrow type. This function is +equivalent to \code{\link[base]{typeof}()} rather than +\code{\link[base]{mode}()} or \code{\link[base]{class}()}, and returns the +equivalent \strong{type}. For example, the equivalent \R type to an Arrow +\link[arrow]{dictionary} is \dQuote{\code{integer}}, not +\dQuote{\code{factor}}; likewise, the equivalent \R type to an Arrow 64-bit +integer is \dQuote{\code{double}} +} +\seealso{ +\code{\link[base]{typeof}()} +} +\keyword{internal} diff --git a/apis/r/tests/testthat/test-r-arrow-types.R b/apis/r/tests/testthat/test-r-arrow-types.R new file mode 100644 index 0000000000..95b5e12617 --- /dev/null +++ b/apis/r/tests/testthat/test-r-arrow-types.R @@ -0,0 +1,93 @@ +test_that("Arrow to R types: data type", { + skip_if(!extended_tests()) + skip_if_not_installed('arrow') + + ints <- apply( + expand.grid(c('', 'u'), 'int', c('8', '16', '32')), + MARGIN = 1L, + FUN = paste, + collapse = '' + ) + for (i in c(ints, 'dictionary')) { + f <- get(i, envir = asNamespace('arrow')) + expect_type(rt <- r_type_from_arrow_type(f()), 'character') + expect_length(rt, 1L) + expect_null(names(rt)) + expect_identical( + rt, + 'integer', + label = sprintf('r_type_from_arrow_type(arrow::%s())', i) + ) + } + + dbls <- c('int64', 'uint64', 'date32', 'timestamp' ,'float', 'float32') + for (i in dbls) { + f <- get(i, envir = asNamespace('arrow')) + expect_type(rt <- r_type_from_arrow_type(f()), 'character') + expect_length(rt, 1L) + expect_null(names(rt)) + expect_identical( + rt, + 'double', + label = sprintf('r_type_from_arrow_type(arrow::%s())', i) + ) + } + + for (i in c('bool', 'boolean')) { + f <- get(i, envir = asNamespace('arrow')) + expect_type(rt <- r_type_from_arrow_type(f()), 'character') + expect_length(rt, 1L) + expect_null(names(rt)) + expect_identical( + rt, + 'logical', + label = sprintf('r_type_from_arrow_type(arrow::%s())', i) + ) + } + for (i in c('utf8', 'string', 'large_utf8')) { + f <- get(i, envir = asNamespace('arrow')) + expect_type(rt <- r_type_from_arrow_type(f()), 'character') + expect_length(rt, 1L) + expect_null(names(rt)) + expect_identical( + rt, + 'character', + label = sprintf('r_type_from_arrow_type(arrow::%s())', i) + ) + } +}) + +test_that("Arrow to R types: field", { + skip_if(!extended_tests()) + skip_if_not_installed('arrow') + + field <- arrow::field(name = random_name(), type = arrow::int8()) + expect_type(rt <- r_type_from_arrow_type(field), 'character') + expect_length(rt, 1L) + expect_named(rt, field$name) + expect_equivalent(rt, 'integer') +}) + +test_that("Arrow to R types: schema", { + skip_if(!extended_tests()) + + asch <- create_arrow_schema() + expect_type(rt <- r_type_from_arrow_type(asch), 'character') + expect_length(rt, length(asch)) + expect_named(rt, asch$names) + for (fn in names(rt)) { + et <- switch( + EXPR = fn, + int_column = 'integer', + soma_joinid = 'double', + float_column = 'double', + string_column = 'character' + ) + expect_equivalent( + rt[fn], + et, + label = sprintf('r_type_from_arrow_type(schema[[%s]])', fn), + expected.label = dQuote(et, FALSE) + ) + } +})