Skip to content

Commit

Permalink
query/timestamp wip
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 15, 2024
1 parent 83fd4ff commit faa647a
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 32 deletions.
63 changes: 55 additions & 8 deletions apis/r/R/QueryCondition.R
Original file line number Diff line number Diff line change
Expand Up @@ -162,17 +162,64 @@ parse_query_condition_new <- function(
arrow_type_name <- "utf8"
}

if (arrow_type_name == "timestamp") {
unit <- arrow_field$type$unit()
if (unit == 0) {
arrow_type_name <- "timestamp_s"
} else if (unit == 1) {
arrow_type_name <- "timestamp_ms"
} else if (unit == 2) {
arrow_type_name <- "timestamp_us"
} else if (unit == 3) {
arrow_type_name <- "timestamp_ns"
} else {
.error_function(
"Attribute '", attr_name, "' has unknown unit ",
arrow_field$type$unit, call. = FALSE)
}
}

value = switch(
arrow_type_name,
ascii = rhs_text,
utf8 = rhs_text,
bool = as.logical(rhs_text),
# Problem:

# > t <-as.POSIXct('1970-01-01 01:00:05 UTC')
# > as.numeric(t)
# [1] 21605
# > ?as.POSIXct
# > t <-as.POSIXct('1970-01-01 01:00:05 EST')
# > as.numeric(t)
# [1] 21605
# > t <-as.POSIXct('1970-01-01 01:00:05 UTC', tz="EST")
# > as.numeric(t)
# [1] 21605
# > t <-as.POSIXct('1970-01-01 01:00:05 UTC', tz="UTC")
# > as.numeric(t)
# [1] 3605

# It's not respecting the timezone given in the first argument string.
# Not good.

timestamp_s = as.numeric(as.POSIXct(rhs_text, tz="UTC")), # THIS NEEDS THOUGHT
timestamp_ms = as.numeric(as.POSIXct(rhs_text, tz="UTC")), # THIS NEEDS THOUGHT
timestamp_ns = as.numeric(as.POSIXct(rhs_text, tz="UTC")), # THIS NEEDS THOUGHT
timestamp_us = as.numeric(as.POSIXct(rhs_text, tz="UTC")), # THIS NEEDS THOUGHT
date32 = as.Date(rhs_text),
as.numeric(rhs_text))

spdl::debug("[parseqc] triple name:[{}] value:[{}] type:[{}] op:[{}]",
attr_name,
value,
arrow_type_name,
op_name);

# General case of extracting appropriate value given type info
return(tiledbsoma_query_condition_from_triple(
attr_name = attr_name,
value = switch(
arrow_type_name,
ascii = rhs_text,
utf8 = rhs_text,
bool = as.logical(rhs_text),
date32 = as.POSIXct(rhs_text),
timestamp = as.Date(rhs_text),
as.numeric(rhs_text)),
value = value,
arrow_type_name = arrow_type_name,
op_name = .map_op_to_character(op_name),
qc = tiledbsoma_empty_query_condition(somactx)))
Expand Down
23 changes: 21 additions & 2 deletions apis/r/src/query_condition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,32 @@ void libtiledbsoma_query_condition_from_triple(
uint64_t cond_val_size = sizeof(bool);
query_cond->init(attr_name, (void*)&v, cond_val_size, op);

} else if (arrow_type_name == "timestamp") {
// Arrow timestamp TileDB DATETIME_MS
} else if (arrow_type_name == "timestamp_s") {
int64_t v = static_cast<int64_t>(
Rcpp::as<double>(condition_value));
spdl::debug("ts3 {}", v);
uint64_t cond_val_size = sizeof(int64_t);
query_cond->init(attr_name, (void*)&v, cond_val_size, op);

} else if (arrow_type_name == "timestamp_ms") {
int64_t v = static_cast<int64_t>(
Rcpp::as<double>(condition_value) * 1000);
uint64_t cond_val_size = sizeof(int64_t);
query_cond->init(attr_name, (void*)&v, cond_val_size, op);

} else if (arrow_type_name == "timestamp_us") {
int64_t v = static_cast<int64_t>(
Rcpp::as<double>(condition_value) * 1e6);
uint64_t cond_val_size = sizeof(int64_t);
query_cond->init(attr_name, (void*)&v, cond_val_size, op);

} else if (arrow_type_name == "timestamp_ns") {
// XXX nanotime ...
int64_t v = static_cast<int64_t>(
Rcpp::as<double>(condition_value) * 1e9);
uint64_t cond_val_size = sizeof(int64_t);
query_cond->init(attr_name, (void*)&v, cond_val_size, op);

} else if (arrow_type_name == "date32") {
// Arrow date32 TileDB DATETIME_DAY
int64_t v = static_cast<int64_t>(Rcpp::as<double>(condition_value));
Expand Down
60 changes: 38 additions & 22 deletions apis/r/tests/testthat/test-query-condition.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ test_that("DataFrame Factory", {
value_type = arrow::utf8(),
ordered = TRUE)),
arrow::field("float32", arrow::float32()),
arrow::field("float64", arrow::float64())
arrow::field("float64", arrow::float64()),
# TODO: for a follow-up PR
# arrow::field("timestamp_s", arrow::timestamp(unit="s")),
# arrow::field("timestamp_ms", arrow::timestamp(unit="ms")),
# arrow::field("timestamp_us", arrow::timestamp(unit="us")),
# arrow::field("timestamp_ns", arrow::timestamp(unit="ns"))
arrow::field("timestamp_s", arrow::timestamp(unit="s")),
arrow::field("timestamp_ms", arrow::timestamp(unit="ms")),
arrow::field("timestamp_us", arrow::timestamp(unit="us")),
arrow::field("timestamp_ns", arrow::timestamp(unit="ns"))
# Not supported in libtiledbsoma
# arrow::field("datetime_day", arrow::date32())
)
Expand All @@ -54,10 +54,10 @@ test_that("DataFrame Factory", {
float32 = 1.5:10.5,
float64 = 11.5:20.5,
# TODO: for a follow-up PR
# timestamp_s = as.POSIXct(as.numeric(3600 + 1:10), tz="GMT"),
# timestamp_ms = as.POSIXct(as.numeric(3600*1000 + 1:10), tz="GMT"),
# timestamp_us = as.POSIXct(as.numeric(3600*1000*1000 + 1:10), tz="GMT"),
# timestamp_ns = as.POSIXct(as.numeric(3600*1000*1000*1000 + 1:10), tz="GMT"),
timestamp_s = as.POSIXct(as.numeric(1*3600 + 1:10), tz="UTC"),
timestamp_ms = as.POSIXct(as.numeric(2*3600 + 1:10), tz="UTC"),
timestamp_us = as.POSIXct(as.numeric(3*3600 + 1:10), tz="UTC"),
timestamp_ns = as.POSIXct(as.numeric(4*3600 + 1:10), tz="UTC"),
schema = sch)
sdf$write(tbl)
sdf$close()
Expand Down Expand Up @@ -154,21 +154,37 @@ test_that("DataFrame Factory", {
},
'enum %nin% c("orange", "purple")' = function(df) {
expect_equal(df$soma_joinid, 1:10)
}
},

# TODO: for a follow-up PR
# 'timestamp_s < "1969-12-31 20:01:04 EST"' = function(df) {
# expect_equal(df$soma_joinid, 1:3)
# },
# 'timestamp_ms != "1970-02-11 11:00:05 EST"' = function(df) {
# expect_equal(df$soma_joinid, 1:10)
# },
# 'timestamp_us > "1970-01-01 00:00:01 GMT"' = function(df) {
# expect_equal(df$soma_joinid, 1:10)
# },
# 'timestamp_ns > "1970-01-01 00:00:01 GMT"' = function(df) {
# expect_equal(df$soma_joinid, 1:10)
# }
'timestamp_s < "1970-01-01 01:00:05 UTC"' = function(df) {
expect_equal(df$soma_joinid, 1:4)
},

'timestamp_ms < "1970-01-01 02:00:05 UTC"' = function(df) {
expect_equal(df$soma_joinid, 1:4)
},

'timestamp_us < "1970-01-01 03:00:05 UTC"' = function(df) {
expect_equal(df$soma_joinid, 1:4)
},

'timestamp_ns < "1970-01-01 04:00:05 UTC"' = function(df) {
expect_equal(df$soma_joinid, 1:4)
}

# timestamp_s timestamp_ms timestamp_us timestamp_ns
# 1970-01-01 01:00:01 1970-01-01 02:00:01 1970-01-01 03:00:01 1970-01-01 04:00:01
# 1970-01-01 01:00:02 1970-01-01 02:00:02 1970-01-01 03:00:02 1970-01-01 04:00:02
# 1970-01-01 01:00:03 1970-01-01 02:00:03 1970-01-01 03:00:03 1970-01-01 04:00:03
# 1970-01-01 01:00:04 1970-01-01 02:00:04 1970-01-01 03:00:04 1970-01-01 04:00:04
# 1970-01-01 01:00:05 1970-01-01 02:00:05 1970-01-01 03:00:05 1970-01-01 04:00:05
# 1970-01-01 01:00:06 1970-01-01 02:00:06 1970-01-01 03:00:06 1970-01-01 04:00:06
# 1970-01-01 01:00:07 1970-01-01 02:00:07 1970-01-01 03:00:07 1970-01-01 04:00:07
# 1970-01-01 01:00:08 1970-01-01 02:00:08 1970-01-01 03:00:08 1970-01-01 04:00:08
# 1970-01-01 01:00:09 1970-01-01 02:00:09 1970-01-01 03:00:09 1970-01-01 04:00:09
# 1970-01-01 01:00:10 1970-01-01 02:00:10 1970-01-01 03:00:10 1970-01-01 04:00:10

)

for (query_string in names(good_cases)) {
Expand Down

0 comments on commit faa647a

Please sign in to comment.