Skip to content

Commit

Permalink
[r] Add Arrow to R type mapper (#3161)
Browse files Browse the repository at this point in the history
* [r] Add Arrow to R type mapper

Add new `r_type_from_arrow_type()` function to get R types from Arrow
types. This function takes an Arrow schema, field, or datatype and
returns the corresponding R type (as indicated by `typeof()`) (or types
for a schema)

[SC-57387](https://app.shortcut.com/tiledb-inc/story/57387)

* Update docs

* Update changelog
Bump develop version
  • Loading branch information
mojaveazure authored Oct 10, 2024
1 parent cced6ea commit 4d37aa9
Show file tree
Hide file tree
Showing 7 changed files with 234 additions and 5 deletions.
2 changes: 1 addition & 1 deletion apis/r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Description: Interface for working with 'TileDB'-based Stack of Matrices,
like those commonly used for single cell data analysis. It is documented at
<https://github.com/single-cell-data>; a formal specification available is at
<https://github.com/single-cell-data/SOMA/blob/main/abstract_specification.md>.
Version: 1.15.99.2
Version: 1.15.99.3
Authors@R: c(
person(given = "Aaron", family = "Wolen",
role = c("cre", "aut"), email = "aaron@tiledb.com",
Expand Down
4 changes: 4 additions & 0 deletions apis/r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ S3method(itertools::hasNext,CoordsStrider)
S3method(length,CoordsStrider)
S3method(length,MappingBase)
S3method(names,MappingBase)
S3method(r_type_from_arrow_type,DataType)
S3method(r_type_from_arrow_type,Field)
S3method(r_type_from_arrow_type,Schema)
S3method(write_soma,Assay)
S3method(write_soma,DataFrame)
S3method(write_soma,DimReduc)
Expand Down Expand Up @@ -84,6 +87,7 @@ export(has_metadata)
export(list_datasets)
export(load_dataset)
export(matrixZeroBasedView)
export(r_type_from_arrow_type)
export(set_log_level)
export(set_metadata)
export(show_package_versions)
Expand Down
1 change: 1 addition & 0 deletions apis/r/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* Push `schema` accessor down to `libtiledbsoma` [#3079](https://github.com/single-cell-data/TileDB-SOMA/pull/3079)
* Handle `numeric` coords properly when reading arrays
* Remove two more `tiledb::schema` callsites [#3160](https://github.com/single-cell-data/TileDB-SOMA/pull/3160)
* Add new Arrow-to-R type mapper

# tiledbsoma 1.14.1

Expand Down
3 changes: 1 addition & 2 deletions apis/r/R/TileDBArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,7 @@ TileDBArray <- R6::R6Class(
#' @description Retrieve the array schema as an Arrow schema (lifecycle: maturing)
#' @return A [`arrow::schema`] object
schema = function() {
arrow::as_schema(
c_schema(self$uri, private$.soma_context));
return(arrow::as_schema(c_schema(self$uri, private$.soma_context)))
},

#' @description Retrieve the array schema as TileDB schema (lifecycle: maturing)
Expand Down
89 changes: 87 additions & 2 deletions apis/r/R/utils-arrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,92 @@ arrow_type_from_tiledb_type <- function(x) {
)
}

#' Get the \R Type from an Arrow Type
#'
#' Get an \R \link[base:typeof]{type} from an Arrow type. This function is
#' equivalent to \code{\link[base]{typeof}()} rather than
#' \code{\link[base]{mode}()} or \code{\link[base]{class}()}, and returns the
#' equivalent \strong{type}. For example, the equivalent \R type to an Arrow
#' \link[arrow]{dictionary} is \dQuote{\code{integer}}, not
#' \dQuote{\code{factor}}; likewise, the equivalent \R type to an Arrow 64-bit
#' integer is \dQuote{\code{double}}
#'
#' @param x An \CRANpkg{Arrow} \link[arrow:Schema]{schema},
#' \link[arrow:Field]{field}, or \link[arrow:infer_type]{data type}
#'
#' @return If \code{x} is a \link[arrow:infer_type]{data type}, a single
#' character value giving the \R \link[base:typeof]{type} of \code{x}; if no
#' corresponding \R type, returns the \CRANpkg{Arrow} type name
#'
#' @return If \code{x} is a \link[arrow:Field]{field}, a single named character
#' vector with the name being the field name and the value being the \R
#' \link[base:typeof]{type}
#'
#' @return If \code{x} is a \link[arrow:Schema]{schema}, a named vector where
#' the names are field names and the values are the \R \link[base:typeof]{types}
#' of each field
#'
#' @keywords internal
#'
#' @export
#'
#' @seealso \code{\link[base]{typeof}()}
#'
r_type_from_arrow_type <- function(x) UseMethod('r_type_from_arrow_type')

#' @rdname r_type_from_arrow_type
#'
#' @method r_type_from_arrow_type Schema
#' @export
#'
r_type_from_arrow_type.Schema <- function(x) {
return(vapply(
X = x$names,
FUN = function(f) r_type_from_arrow_type(x[[f]]),
FUN.VALUE = character(1L),
USE.NAMES = TRUE
))
}

#' @rdname r_type_from_arrow_type
#'
#' @method r_type_from_arrow_type Field
#' @export
#'
r_type_from_arrow_type.Field <- function(x) {
tt <- r_type_from_arrow_type(x$type)
names(x = tt) <- x$name
return(tt)
}

#' @rdname r_type_from_arrow_type
#'
#' @method r_type_from_arrow_type DataType
#' @export
#'
r_type_from_arrow_type.DataType <- function(x) {
# Types are equivalent to `typeof()`, not `mode()` or `class()`
return(switch(
EXPR = x$name,
int8 = ,
int16 = ,
int32 = ,
dictionary = ,
uint8 = ,
uint16 = ,
uint32 = 'integer',
int64 = ,
uint64 = ,
date32 = ,
timestamp = ,
float = 'double',
bool = 'logical',
utf8 = ,
large_utf8 = 'character',
x$name
))
}

#' Retrieve limits for Arrow types
#' @importFrom bit64 lim.integer64
#' @noRd
Expand Down Expand Up @@ -174,10 +260,9 @@ arrow_field_from_tiledb_dim <- function(x) {
## With a nod to Kevin Ushey
#' @noRd
yoink <- function(package, symbol) {
do.call(":::", list(package, symbol))
do.call(":::", list(package, symbol))
}


#' Create an Arrow field from a TileDB attribute
#' @noRd
arrow_field_from_tiledb_attr <- function(x, arrptr=NULL) {
Expand Down
47 changes: 47 additions & 0 deletions apis/r/man/r_type_from_arrow_type.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

93 changes: 93 additions & 0 deletions apis/r/tests/testthat/test-r-arrow-types.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
test_that("Arrow to R types: data type", {
skip_if(!extended_tests())
skip_if_not_installed('arrow')

ints <- apply(
expand.grid(c('', 'u'), 'int', c('8', '16', '32')),
MARGIN = 1L,
FUN = paste,
collapse = ''
)
for (i in c(ints, 'dictionary')) {
f <- get(i, envir = asNamespace('arrow'))
expect_type(rt <- r_type_from_arrow_type(f()), 'character')
expect_length(rt, 1L)
expect_null(names(rt))
expect_identical(
rt,
'integer',
label = sprintf('r_type_from_arrow_type(arrow::%s())', i)
)
}

dbls <- c('int64', 'uint64', 'date32', 'timestamp' ,'float', 'float32')
for (i in dbls) {
f <- get(i, envir = asNamespace('arrow'))
expect_type(rt <- r_type_from_arrow_type(f()), 'character')
expect_length(rt, 1L)
expect_null(names(rt))
expect_identical(
rt,
'double',
label = sprintf('r_type_from_arrow_type(arrow::%s())', i)
)
}

for (i in c('bool', 'boolean')) {
f <- get(i, envir = asNamespace('arrow'))
expect_type(rt <- r_type_from_arrow_type(f()), 'character')
expect_length(rt, 1L)
expect_null(names(rt))
expect_identical(
rt,
'logical',
label = sprintf('r_type_from_arrow_type(arrow::%s())', i)
)
}
for (i in c('utf8', 'string', 'large_utf8')) {
f <- get(i, envir = asNamespace('arrow'))
expect_type(rt <- r_type_from_arrow_type(f()), 'character')
expect_length(rt, 1L)
expect_null(names(rt))
expect_identical(
rt,
'character',
label = sprintf('r_type_from_arrow_type(arrow::%s())', i)
)
}
})

test_that("Arrow to R types: field", {
skip_if(!extended_tests())
skip_if_not_installed('arrow')

field <- arrow::field(name = random_name(), type = arrow::int8())
expect_type(rt <- r_type_from_arrow_type(field), 'character')
expect_length(rt, 1L)
expect_named(rt, field$name)
expect_equivalent(rt, 'integer')
})

test_that("Arrow to R types: schema", {
skip_if(!extended_tests())

asch <- create_arrow_schema()
expect_type(rt <- r_type_from_arrow_type(asch), 'character')
expect_length(rt, length(asch))
expect_named(rt, asch$names)
for (fn in names(rt)) {
et <- switch(
EXPR = fn,
int_column = 'integer',
soma_joinid = 'double',
float_column = 'double',
string_column = 'character'
)
expect_equivalent(
rt[fn],
et,
label = sprintf('r_type_from_arrow_type(schema[[%s]])', fn),
expected.label = dQuote(et, FALSE)
)
}
})

0 comments on commit 4d37aa9

Please sign in to comment.