-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement expand and complete (#225)
Also bump minimum data.table version to 1.13.0
- Loading branch information
1 parent
9c83443
commit 3360f20
Showing
13 changed files
with
383 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#' Complete a data frame with missing combinations of data | ||
#' | ||
#' @description | ||
#' This is a method for the tidyr `complete()` generic. This is a wrapper | ||
#' around `dtplyr` translations for `expand()`, `full_join()`, and `replace_na()` | ||
#' that's useful for completing missing combinations of data. | ||
#' | ||
#' @param data A [lazy_dt()]. | ||
#' @inheritParams tidyr::complete | ||
#' @examples | ||
#' library(tidyr) | ||
#' tbl <- tibble(x = 1:2, y = 1:2, z = 3:4) | ||
#' dt <- lazy_dt(tbl) | ||
#' | ||
#' dt %>% | ||
#' complete(x, y) | ||
#' | ||
#' dt %>% | ||
#' complete(x, y, fill = list(z = 10L)) | ||
# exported onLoad | ||
complete.dtplyr_step <- function(data, ..., fill = list()) { | ||
dots <- enquos(...) | ||
dots <- dots[!vapply(dots, quo_is_null, logical(1))] | ||
if (length(dots) == 0) { | ||
return(data) | ||
} | ||
|
||
full <- tidyr::expand(data, !!!dots) | ||
full <- dplyr::full_join(full, data, by = full$vars) | ||
full <- tidyr::replace_na(full, replace = fill) | ||
full | ||
} | ||
|
||
# exported onLoad | ||
complete.data.table <- function(data, ..., fill = list()) { | ||
data <- lazy_dt(data) | ||
tidyr::complete(data, ..., fill = fill) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#' Expand data frame to include all possible combinations of values. | ||
#' | ||
#' @description | ||
#' This is a method for the tidyr `expand()` generic. It is translated to | ||
#' [data.table::CJ()]. | ||
#' | ||
#' @param data A [lazy_dt()]. | ||
#' @inheritParams tidyr::expand | ||
#' @examples | ||
#' library(tidyr) | ||
#' | ||
#' fruits <- lazy_dt(tibble( | ||
#' type = c("apple", "orange", "apple", "orange", "orange", "orange"), | ||
#' year = c(2010, 2010, 2012, 2010, 2010, 2012), | ||
#' size = factor( | ||
#' c("XS", "S", "M", "S", "S", "M"), | ||
#' levels = c("XS", "S", "M", "L") | ||
#' ), | ||
#' weights = rnorm(6, as.numeric(size) + 2) | ||
#' )) | ||
#' | ||
#' # All possible combinations --------------------------------------- | ||
#' # Note that all defined, but not necessarily present, levels of the | ||
#' # factor variable `size` are retained. | ||
#' fruits %>% expand(type) | ||
#' fruits %>% expand(type, size) | ||
#' fruits %>% expand(type, size, year) | ||
#' | ||
#' # Other uses ------------------------------------------------------- | ||
#' fruits %>% expand(type, size, 2010:2012) | ||
#' | ||
#' # Use `anti_join()` to determine which observations are missing | ||
#' all <- fruits %>% expand(type, size, year) | ||
#' all | ||
#' all %>% dplyr::anti_join(fruits) | ||
#' | ||
#' # Use with `right_join()` to fill in missing rows | ||
#' fruits %>% dplyr::right_join(all) | ||
# exported onLoad | ||
expand.dtplyr_step <- function(data, ..., .name_repair = "check_unique") { | ||
dots <- capture_dots(data, ..., .j = FALSE) | ||
dots <- dots[!vapply(dots, is_null, logical(1))] | ||
if (length(dots) == 0) { | ||
return(data) | ||
} | ||
|
||
named_dots <- have_name(dots) | ||
if (any(!named_dots)) { | ||
# Auto-names generated by enquos() don't always work with the CJ() step | ||
## Ex: `1:3` | ||
# Replicates the "V" naming convention data.table uses | ||
symbol_dots <- vapply(dots, is_symbol, logical(1)) | ||
needs_v_name <- !symbol_dots & !named_dots | ||
v_names <- paste0("V", 1:length(dots)) | ||
names(dots)[needs_v_name] <- v_names[needs_v_name] | ||
names(dots)[symbol_dots] <- lapply(dots[symbol_dots], as_name) | ||
} | ||
names(dots) <- vctrs::vec_as_names(names(dots), repair = .name_repair) | ||
|
||
on <- names(dots) | ||
cj <- expr(CJ(!!!syms(on), unique = TRUE)) | ||
|
||
out <- distinct(data, !!!syms(data$groups), !!!dots) | ||
if (length(data$groups) == 0) { | ||
out <- step_subset(out, i = cj, on = on) | ||
} else { | ||
on <- call2(".", !!!syms(on)) | ||
out <- step_subset(out, j = expr(.SD[!!cj, on = !!on])) | ||
} | ||
|
||
out | ||
} | ||
|
||
# exported onLoad | ||
expand.data.table <- function(data, ..., .name_repair = "check_unique") { | ||
data <- lazy_dt(data) | ||
tidyr::expand(data, ..., .name_repair = .name_repair) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
test_that("complete with no variables returns data as is", { | ||
mtcars_dt <- lazy_dt(mtcars, "DT") | ||
expect_equal(complete(mtcars_dt), mtcars_dt) | ||
}) | ||
|
||
test_that("basic invocation works", { | ||
tbl <- tibble(x = 1:2, y = 1:2, z = 3:4) | ||
dt <- lazy_dt(tbl, "DT") | ||
out <- dt %>% complete(x, y) %>% collect() | ||
|
||
expect_equal(nrow(out), 4) | ||
expect_equal(out$z, c(3, NA, NA, 4)) | ||
}) | ||
|
||
test_that("empty expansion returns original", { | ||
tbl <- tibble(x = character()) | ||
dt <- lazy_dt(tbl, "DT") | ||
out <- dt %>% complete(y = NULL) %>% collect() | ||
expect_equal(out, tbl) | ||
|
||
tbl <- tibble(x = 1:4) | ||
dt <- lazy_dt(tbl, "DT") | ||
out <- dt %>% complete(y = NULL) %>% collect() | ||
expect_equal(out, tbl) | ||
}) |
Oops, something went wrong.