diff --git a/DESCRIPTION b/DESCRIPTION index 21587d9de..71cdac8bc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,8 +11,8 @@ Description: This implements the data table back-end for 'dplyr' so that you License: GPL (>= 2) Imports: dplyr (>= 0.5.0), + tidyselect, data.table (>= 1.9.6), - lazyeval, rlang Suggests: Lahman, @@ -21,5 +21,6 @@ Suggests: covr LazyData: true RoxygenNote: 6.1.1 +Encoding: UTF-8 URL: https://github.com/hadley/dtplyr BugReports: https://github.com/hadley/dtplyr/issues diff --git a/NAMESPACE b/NAMESPACE index 25c356826..9c4121be1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,21 +25,25 @@ export(src_dt) export(tbl_dt) import(rlang) importFrom(data.table,as.data.table) -importFrom(dplyr,arrange_) +importFrom(dplyr,arrange) importFrom(dplyr,as.tbl) importFrom(dplyr,auto_copy) -importFrom(dplyr,distinct_) -importFrom(dplyr,do_) -importFrom(dplyr,filter_) -importFrom(dplyr,group_by_) +importFrom(dplyr,distinct) +importFrom(dplyr,do) +importFrom(dplyr,filter) +importFrom(dplyr,group_by) +importFrom(dplyr,group_by_prepare) +importFrom(dplyr,group_vars) importFrom(dplyr,groups) -importFrom(dplyr,mutate_) -importFrom(dplyr,rename_) +importFrom(dplyr,mutate) +importFrom(dplyr,rename) importFrom(dplyr,same_src) -importFrom(dplyr,select_) -importFrom(dplyr,slice_) -importFrom(dplyr,summarise_) +importFrom(dplyr,select) +importFrom(dplyr,slice) +importFrom(dplyr,summarise) importFrom(dplyr,tbl_vars) importFrom(dplyr,ungroup) +importFrom(tidyselect,vars_rename) +importFrom(tidyselect,vars_select) importFrom(utils,head) importFrom(utils,tail) diff --git a/NEWS.md b/NEWS.md index 83fdc6ddc..834721bbb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,13 @@ - # dtplyr 0.0.3.9000 +* Convert from lazyeval to tidy eval (@christophsax). + +* `ungroup()` now copies it's input (@christophsax, #54). + +* Scoped verbs now work (@christophsax, #43, #51). + +* `mutate()` preserves grouping (@christophsax, #17). + # dtplyr 0.0.3 - Maintenance release for CRAN checks. diff --git a/R/compat-dplyr-0.6.0.R b/R/compat-dplyr-0.6.0.R index c1e61eaf6..ddca39862 100644 --- a/R/compat-dplyr-0.6.0.R +++ b/R/compat-dplyr-0.6.0.R @@ -13,8 +13,8 @@ register_s3_method("dplyr", "slice", "data.table") } - register_s3_method("dplyr", "do_", "data.table") - register_s3_method("dplyr", "do_", "tbl_dt") + register_s3_method("dplyr", "do", "data.table") + register_s3_method("dplyr", "do", "tbl_dt") register_s3_method("base", "print", "grouped_dt") register_s3_method("dplyr", "groups", "grouped_dt") @@ -22,9 +22,9 @@ register_s3_method("dplyr", "n_groups", "grouped_dt") register_s3_method("dplyr", "ungroup", "grouped_dt") - register_s3_method("dplyr", "group_by_", "data.table") - register_s3_method("dplyr", "do_", "grouped_dt") - register_s3_method("dplyr", "distinct_", "grouped_dt") + register_s3_method("dplyr", "group_by", "data.table") + register_s3_method("dplyr", "do", "grouped_dt") + register_s3_method("dplyr", "distinct", "grouped_dt") register_s3_method("dplyr", "inner_join", "data.table") register_s3_method("dplyr", "left_join", "data.table") @@ -38,36 +38,36 @@ register_s3_method("dplyr", "sample_frac", "tbl_dt") register_s3_method("dplyr", "sample_frac", "grouped_dt") - register_s3_method("dplyr", "distinct_", "data.table") - register_s3_method("dplyr", "distinct_", "tbl_dt") + register_s3_method("dplyr", "distinct", "data.table") + register_s3_method("dplyr", "distinct", "tbl_dt") - register_s3_method("dplyr", "filter_", "grouped_dt") - register_s3_method("dplyr", "filter_", "tbl_dt") - register_s3_method("dplyr", "filter_", "data.table") + register_s3_method("dplyr", "filter", "grouped_dt") + register_s3_method("dplyr", "filter", "tbl_dt") + register_s3_method("dplyr", "filter", "data.table") - register_s3_method("dplyr", "summarise_", "grouped_dt") - register_s3_method("dplyr", "summarise_", "tbl_dt") - register_s3_method("dplyr", "summarise_", "data.table") + register_s3_method("dplyr", "summarise", "grouped_dt") + register_s3_method("dplyr", "summarise", "tbl_dt") + register_s3_method("dplyr", "summarise", "data.table") - register_s3_method("dplyr", "mutate_", "grouped_dt") - register_s3_method("dplyr", "mutate_", "tbl_dt") - register_s3_method("dplyr", "mutate_", "data.table") + register_s3_method("dplyr", "mutate", "grouped_dt") + register_s3_method("dplyr", "mutate", "tbl_dt") + register_s3_method("dplyr", "mutate", "data.table") - register_s3_method("dplyr", "arrange_", "grouped_dt") - register_s3_method("dplyr", "arrange_", "tbl_dt") - register_s3_method("dplyr", "arrange_", "data.table") + register_s3_method("dplyr", "arrange", "grouped_dt") + register_s3_method("dplyr", "arrange", "tbl_dt") + register_s3_method("dplyr", "arrange", "data.table") - register_s3_method("dplyr", "select_", "grouped_dt") - register_s3_method("dplyr", "select_", "tbl_dt") - register_s3_method("dplyr", "select_", "data.table") + register_s3_method("dplyr", "select", "grouped_dt") + register_s3_method("dplyr", "select", "tbl_dt") + register_s3_method("dplyr", "select", "data.table") - register_s3_method("dplyr", "rename_", "grouped_dt") - register_s3_method("dplyr", "rename_", "tbl_dt") - register_s3_method("dplyr", "rename_", "data.table") + register_s3_method("dplyr", "rename", "grouped_dt") + register_s3_method("dplyr", "rename", "tbl_dt") + register_s3_method("dplyr", "rename", "data.table") - register_s3_method("dplyr", "slice_", "grouped_dt") - register_s3_method("dplyr", "slice_", "tbl_dt") - register_s3_method("dplyr", "slice_", "data.table") + register_s3_method("dplyr", "slice", "grouped_dt") + register_s3_method("dplyr", "slice", "tbl_dt") + register_s3_method("dplyr", "slice", "data.table") invisible() } diff --git a/R/do.R b/R/do.R index 5955cbb61..cfd9ec313 100644 --- a/R/do.R +++ b/R/do.R @@ -3,16 +3,12 @@ #' importFrom(dplyr,do) #' S3method(do,data.table) #' } +#' @importFrom dplyr do do.data.table <- function(.data, ...) { - do_(.data, .dots = lazyeval::lazy_dots(...)) -} - -#' @importFrom dplyr do_ -do_.data.table <- function(.data, ..., .dots) { - out <- do_(as.data.frame(.data), ..., .dots = .dots) + out <- do(as.data.frame(.data), !!! quos(...)) data.table::as.data.table(out) } -do_.tbl_dt <- function(.data, ..., .dots) { - out <- do_(as.data.frame(.data), ..., .dots = .dots) +do.tbl_dt <- function(.data, ...) { + out <- do(as.data.frame(.data), !!! quos(...)) tbl_dt(out) } diff --git a/R/grouped-dt.r b/R/grouped-dt.r index 5750a183c..bd725fe98 100644 --- a/R/grouped-dt.r +++ b/R/grouped-dt.r @@ -64,34 +64,35 @@ n_groups.grouped_dt <- function(x) { nrow(dt_subset(x, , quote(list(1)))) } -#' @importFrom dplyr group_by_ +#' @importFrom dplyr group_by group_by_prepare group_vars group_by.data.table <- function(.data, ..., add = FALSE) { - group_by_(.data, .dots = lazyeval::lazy_dots(...), add = add) -} -group_by_.data.table <- function(.data, ..., .dots, add = FALSE) { - groups <- dplyr::group_by_prepare(.data, ..., .dots = .dots, add = add) + groups <- group_by_prepare(.data, ..., add = add) grouped_dt(groups$data, groups$groups) + # groups <- dplyr::group_by_prepare(.data, ..., .dots = .dots, add = add) + # grouped_dt(groups$data, groups$groups) } ungroup.grouped_dt <- function(x, ...) { - data.table::setattr(x, "vars", NULL) - data.table::setattr(x, "class", setdiff(class(x), "grouped_dt")) - x + z <- data.table::copy(x) + data.table::setattr(z, "vars", NULL) + data.table::setattr(z, "class", setdiff(class(z), "grouped_dt")) + z } # Do --------------------------------------------------------------------------- -do_.grouped_dt <- function(.data, ..., .dots) { - args <- lazyeval::all_dots(.dots, ...) - env <- lazyeval::common_env(args) - named <- named_args(args) +do.grouped_dt <- function(.data, ...) { + + dots <- quos(...) + env <- common_env(dots) + named <- named_args(dots) if (!named) { - j <- args[[1]]$expr + j <- get_expr(dots[[1]]) } else { - args <- lapply(args, function(x) call("list", x$expr)) - j <- as.call(c(quote(list), args)) + exprs <- lapply(dots, function(x) call("list", get_expr(x))) + j <- as.call(c(quote(list), exprs)) } out <- dt_subset(.data, , j, env = env, sd_cols = names(.data)) @@ -125,9 +126,9 @@ named_args <- function(args) { # Set operations --------------------------------------------------------------- -distinct_.grouped_dt <- function(.data, ..., .dots) { - groups <- lazyeval::as.lazy_dots(groups(.data)) - dist <- distinct_vars(.data, ..., .dots = c(.dots, groups)) +distinct.grouped_dt <- function(.data, ...) { + + dist <- distinct_vars(.data, quos(!!!groups(.data), ..., .named = TRUE)) grouped_dt(unique(dist$data, by = dist$vars), groups(.data), copy = FALSE) } diff --git a/R/sets.R b/R/sets.R index 9727b5ecc..d34a89c2a 100644 --- a/R/sets.R +++ b/R/sets.R @@ -1,13 +1,9 @@ # Set operations --------------------------------------------------------------- -distinct.data.table <- function(.data, ..., .keep_all = FALSE) { - distinct_(.data, .dots = lazyeval::lazy_dots(...), .keep_all = .keep_all) -} - -#' @importFrom dplyr distinct_ -distinct_.data.table <- function(.data, ..., .dots, .keep_all = FALSE) { - dist <- distinct_vars(.data, ..., .dots = .dots) +#' @importFrom dplyr distinct +distinct.data.table <- function(.data, ..., .dots, .keep_all = FALSE) { + dist <- distinct_vars(.data, quos(..., .named = TRUE), .keep_all = .keep_all) if (length(dist$vars) == 0) { res <- unique(dist$data, by = NULL) @@ -21,21 +17,38 @@ distinct_.data.table <- function(.data, ..., .dots, .keep_all = FALSE) { res } -distinct_.tbl_dt <- function(.data, ..., .dots) { +distinct.tbl_dt <- function(.data, ...) { tbl_dt(NextMethod(), copy = FALSE) } -distinct_vars <- function(.data, ..., .dots) { - dots <- lazyeval::all_dots(.dots, ..., all_named = TRUE) +# unexported from dplyr, removed purrr dependency +distinct_vars <- function(.data, vars, group_vars = character(), .keep_all = FALSE) { + stopifnot(is_quosures(vars), is.character(group_vars)) + + # If no input, keep all variables + if (length(vars) == 0) { + return(list( + data = .data, + vars = names(.data), + keep = names(.data) + )) + } # If any calls, use mutate to add new columns, then distinct on those - needs_mutate <- vapply(dots, function(x) !is.name(x$expr), logical(1)) + needs_mutate <- vapply(vars, quo_is_lang, TRUE) if (any(needs_mutate)) { - .data <- mutate_(.data, .dots = dots[needs_mutate]) + .data <- mutate(.data, !!! vars[needs_mutate]) } # Once we've done the mutate, we no longer need lazy objects, and # can instead just use their names - list(data = .data, vars = names(dots)) -} + out_vars <- intersect(names(.data), c(names(vars), group_vars)) + if (.keep_all) { + keep <- names(.data) + } else { + keep <- unique(out_vars) + } + + list(data = .data, vars = out_vars, keep = keep) +} diff --git a/R/tbl-dt.r b/R/tbl-dt.r index edad2593b..94043fb18 100644 --- a/R/tbl-dt.r +++ b/R/tbl-dt.r @@ -161,23 +161,77 @@ and_expr <- function(exprs) { # The S3 method is registered manually in .onLoad() to avoid an R CMD # check warning -filter.data.table <- function(.data, ...) { - filter_(.data, .dots = lazyeval::lazy_dots(...)) +# Is there something similar in rlang? + +# first version, adapted from lazyeval +# common_env <- function (dots){ +# if (length(dots) == 0) +# return(baseenv()) +# env <- get_env(dots[[1]]) +# if (length(dots) == 1) +# return(env) +# for (i in 2:length(dots)) { +# if (!identical(env, get_env(dots[[i]]))) { +# return(baseenv()) +# } +# } +# env +# } + +# but this still does not take care of: + +# a) Literals. We want to fall back to baseenv() if we dont have a unique env +# that is not emptyenv(). + +# b) Nested environments. If a function is called within a function, we want to +# evaluate the data.table call in the same environments. Some quosures may have +# an environment that is a parent of this environment, so we need to figure +# out the 'deepest' environment. We also need to check that all other +# environments are parents of this environment. + +# third version +common_env <- function (dots){ + stopifnot(inherits(dots, "quosures")) + if (length(dots) == 0) return(baseenv()) + if (length(dots) == 1){ + env <- get_env(dots[[1]]) + } else { + # evaluate common env + envs <- lapply(dots, get_env) + + # use deepest env as a candidate for common env + env <- envs[[which.max(vapply(envs, env_depth, 0L))]] + # fail if some environments are not in search path + if (!all(envs %in% c(env_parents(env), env))){ + stop("Conflicting environment tree. Does this ever arise?") + } + } + + # from ?quosure: + # Literals are enquosed with the empty environment because they can + # be evaluated anywhere. + # But we don't have `[` in emptyenv, which we need in the data.table call. + # Changing to baseenv which was also returned in lazyeval::all_dots() + if (identical(env, emptyenv())){ + env <- baseenv() + } + env } -#' @importFrom dplyr filter_ -filter_.grouped_dt <- function(.data, ..., .dots) { +#' @importFrom dplyr filter +filter.grouped_dt <- function(.data, ...) { grouped_dt(NextMethod(), groups(.data), copy = FALSE) } -filter_.tbl_dt <- function(.data, ..., .dots) { +filter.tbl_dt <- function(.data, ...) { tbl_dt(NextMethod(), copy = FALSE) } -filter_.data.table <- function(.data, ..., .dots) { - dots <- lazyeval::all_dots(.dots, ...) - env <- lazyeval::common_env(dots) +filter.data.table <- function(.data, ...) { + dots <- quos(...) + env <- common_env(dots) # http://stackoverflow.com/questions/16573995/subset-by-group-with-data-table - expr <- lapply(dots, `[[`, "expr") + # expr <- lapply(dots, `[[`, "expr") + expr <- lapply(dots, get_expr) j <- substitute(list(`_row` = .I[expr]), list(expr = and_expr(expr))) indices <- dt_subset(.data, , j, env)$`_row` @@ -186,48 +240,48 @@ filter_.data.table <- function(.data, ..., .dots) { # Summarise -------------------------------------------------------------------- -summarise.data.table <- function(.data, ...) { - summarise_(.data, .dots = lazyeval::lazy_dots(...)) -} - -#' @importFrom dplyr summarise_ -summarise_.grouped_dt <- function(.data, ..., .dots) { +#' @importFrom dplyr summarise +summarise.grouped_dt <- function(.data, ...) { grouped_dt(NextMethod(), drop_last(groups(.data)), copy = FALSE) } -summarise_.tbl_dt <- function(.data, ..., .dots) { +summarise.tbl_dt <- function(.data, ...) { tbl_dt(NextMethod(), copy = FALSE) } -summarise_.data.table <- function(.data, ..., .dots) { - dots <- lazyeval::all_dots(.dots, ..., all_named = TRUE) +summarise.data.table <- function(.data, ...) { + dots <- quos(..., .named = TRUE) - j <- lazyeval::make_call(quote(list), dots) - dt_subset(.data, , j$expr, env = j$env) -} + env <- common_env(dots) + exprs <- lapply(dots, get_expr) -# Mutate ----------------------------------------------------------------------- + j <- as.call(c(quote(list), exprs)) -mutate.data.table <- function(.data, ...) { - mutate_(.data, .dots = lazyeval::lazy_dots(...)) + dt_subset(.data, , j, env = env) } -#' @importFrom dplyr mutate_ -mutate_.grouped_dt <- function(.data, ..., .dots) { - grouped_dt(NextMethod(), drop_last(groups(.data)), copy = FALSE) +# Mutate ----------------------------------------------------------------------- + +mutate.grouped_dt <- function(.data, ...) { + grouped_dt(NextMethod(), groups(.data), copy = FALSE) } -mutate_.tbl_dt <- function(.data, ..., .dots) { +mutate.tbl_dt <- function(.data, ...) { tbl_dt(NextMethod(), copy = FALSE) } -mutate_.data.table <- function(.data, ..., .dots) { - dots <- lazyeval::all_dots(.dots, ..., all_named = TRUE) - names <- lapply(names(dots), as.name) + +#' @importFrom dplyr mutate +mutate.data.table <- function(.data, ...) { + dots <- quos(..., .named = TRUE) + names <- names(dots) # Never want to modify in place .data <- data.table::copy(.data) for(i in seq_along(dots)) { # For each new variable, generate a call of the form df[, new := expr] - j <- substitute(lhs := rhs, list(lhs = names[[i]], rhs = dots[[i]]$expr)) - .data <- dt_subset(.data, , j, dots[[i]]$env) + j <- substitute(lhs := rhs, list(lhs = names[[i]], rhs = get_expr(dots[[i]]))) + + env <- common_env(dots[i]) + + .data <- dt_subset(.data, , j, env) } # Need to use this syntax to make the output visible (#11). @@ -236,101 +290,124 @@ mutate_.data.table <- function(.data, ..., .dots) { # Arrange ---------------------------------------------------------------------- -arrange.data.table <- function(.data, ...) { - arrange_(.data, .dots = lazyeval::lazy_dots(...)) -} +#' @importFrom dplyr arrange +arrange.grouped_dt <- function(.data, ..., .by_group = FALSE) { + if (.by_group) { + dots <- quos(!!!groups(.data), ...) + } else { + dots <- quos(...) + } -#' @importFrom dplyr arrange_ -arrange_.grouped_dt <- function(.data, ..., .dots) { - grouped_dt(NextMethod(), groups(.data), copy = FALSE) + arrange_impl(.data, dots) } -arrange_.tbl_dt <- function(.data, ..., .dots) { +arrange.tbl_dt <- function(.data, ...) { tbl_dt(NextMethod(), copy = FALSE) } -arrange_.data.table <- function(.data, ..., .dots) { - dots <- lazyeval::all_dots(.dots, ...) +arrange.data.table <- function(.data, ...) { + dots <- quos(...) + + arrange_impl(.data, dots) +} + +arrange_impl <- function(.data, dots) { + exprs <- lapply(dots, get_expr) + env <- common_env(dots) - groups <- lazyeval::as.lazy_dots(groups(.data), - env = lazyeval::common_env(dots)) - i <- lazyeval::make_call(quote(order), c(groups, dots)) + i <- as.call(c(quote(order), exprs)) - dt_subset(.data, i$expr, , env = i$env) + dt_subset(.data, i, , env = env) } # Select ----------------------------------------------------------------------- -select.data.table <- function(.data, ...) { - select_(.data, .dots = lazyeval::lazy_dots(...)) +# not exported from dplyr +ensure_group_vars <- function(vars, data, notify = TRUE) { + group_names <- group_vars(data) + missing <- setdiff(group_names, vars) + + if (length(missing) > 0) { + if (notify) { + inform(paste0("Adding missing grouping variables: ", + "`", missing, "`", collapse = ", ") + ) + } + vars <- c(set_names(missing, missing), vars) + } + + vars } -#' @importFrom dplyr select_ -select_.grouped_dt <- function(.data, ..., .dots) { - dots <- lazyeval::all_dots(.dots, ...) - vars <- dplyr::select_vars_(names(.data), dots, - include = as.character(groups(.data))) + +#' @importFrom dplyr select +#' @importFrom tidyselect vars_select vars_rename +select.grouped_dt <- function(.data, ...) { + vars <- tidyselect::vars_select(names(.data), !!! quos(...)) + vars <- ensure_group_vars(vars, .data) + out <- .data[, vars, drop = FALSE, with = FALSE] - data.table::setnames(out, names(vars)) + + if (nrow(out) > 0) data.table::setnames(out, names(vars)) grouped_dt(out, groups(.data), copy = FALSE) + } -select_.data.table <- function(.data, ..., .dots) { - dots <- lazyeval::all_dots(.dots, ...) - vars <- dplyr::select_vars_(names(.data), dots) +select.data.table <- function(.data, ...) { + vars <- tidyselect::vars_select(names(.data), !!! quos(...)) out <- .data[, vars, drop = FALSE, with = FALSE] - data.table::setnames(out, names(vars)) + + if (nrow(out) > 0) data.table::setnames(out, names(vars)) + out } -select_.tbl_dt <- function(.data, ..., .dots) { +select.tbl_dt <- function(.data, ...) { tbl_dt(NextMethod(), copy = FALSE) } # Rename ----------------------------------------------------------------------- -rename.data.table <- function(.data, ...) { - rename_(.data, .dots = lazyeval::lazy_dots(...)) -} - -#' @importFrom dplyr rename_ -rename_.grouped_dt <- function(.data, ..., .dots) { - dots <- lazyeval::all_dots(.dots, ...) - vars <- dplyr::rename_vars_(names(.data), dots) +#' @importFrom dplyr rename +rename.grouped_dt <- function(.data, ...) { + vars <- tidyselect::vars_rename(names(.data), !!! quos(...)) out <- .data[, vars, drop = FALSE, with = FALSE] - data.table::setnames(out, names(vars)) + + if (nrow(out) > 0) data.table::setnames(out, names(vars)) grouped_dt(out, groups(.data), copy = FALSE) } -rename_.data.table <- function(.data, ..., .dots) { - dots <- lazyeval::all_dots(.dots, ...) - vars <- dplyr::rename_vars_(names(.data), dots) +rename.data.table <- function(.data, ...) { + vars <- tidyselect::vars_rename(names(.data), !!! quos(...)) out <- .data[, vars, drop = FALSE, with = FALSE] - data.table::setnames(out, names(vars)) + + if (nrow(out) > 0) data.table::setnames(out, names(vars)) + out } -rename_.tbl_dt <- function(.data, ..., .dots) { +rename.tbl_dt <- function(.data, ...) { tbl_dt(NextMethod(), copy = FALSE) } -# Slice ------------------------------------------------------------------- +# Slice ------------------------------------------------------------------------ -slice.data.table <- function(.data, ...) { - slice_(.data, .dots = lazyeval::lazy_dots(...)) -} - -#' @importFrom dplyr slice_ -slice_.grouped_dt <- function(.data, ..., .dots) { +#' @importFrom dplyr slice +slice.grouped_dt <- function(.data, ...) { grouped_dt(NextMethod(), groups(.data), copy = FALSE) } -slice_.tbl_dt <- function(.data, ..., .dots) { +slice.tbl_dt <- function(.data, ...) { tbl_dt(NextMethod(), copy = FALSE) } -slice_.data.table <- function(.data, ..., .dots) { - dots <- lazyeval::all_dots(.dots, ..., all_named = TRUE) - env <- lazyeval::common_env(dots) +slice.data.table <- function(.data, ...) { + + dots <- quos(...) + env <- common_env(dots) + + exprs <- lapply(dots, get_expr) + + i <- as.call(c(quote(c), exprs)) - j <- substitute(.SD[rows], list(rows = dots[[1]]$expr)) + j <- substitute(.SD[rows], list(rows = i)) dt_subset(.data, , j, env) } diff --git a/R/utils.R b/R/utils.R index 7635e58bb..7d0a7fefc 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,5 +1,3 @@ -# j <- lazyeval::interp(~ .I[rows], rows = args[[1]]$expr) -# dt_subset(dt, , j, env) dt_subset <- function(dt, i, j, env = parent.frame(), sd_cols = NULL) { env <- new.env(parent = env, size = 2L) @@ -18,9 +16,8 @@ dt_subset <- function(dt, i, j, env = parent.frame(), sd_cols = NULL) { call <- substitute(`_dt`[i, j, by = `_vars`], args) call$.SDcols = sd_cols } - # print(call) - eval(call, env) + eval_bare(call, env) } dt_replace <- function(x) { diff --git a/tests/testthat/helpers-skip.R b/tests/testthat/helpers-skip.R new file mode 100644 index 000000000..053b2bd89 --- /dev/null +++ b/tests/testthat/helpers-skip.R @@ -0,0 +1,5 @@ +skip_if_dtplyr <- function(){ + if (TRUE) { + skip("Not applicable to dtplyr.") + } +} diff --git a/tests/testthat/test-arrange.R b/tests/testthat/test-arrange.R index fde4cb949..d2256bb9c 100644 --- a/tests/testthat/test-arrange.R +++ b/tests/testthat/test-arrange.R @@ -30,3 +30,210 @@ test_that("arrange results agree with data same regardless of backend", { compare_tbls(tbls, function(x) x %>% arrange(desc(c), id), compare = equal_df) compare_tbls(tbls, function(x) x %>% arrange(desc(d), id), compare = equal_df) }) + + + +context("arrange (tests from dplyr)") + +df2 <- data.table( + a = rep(c(NA, 1, 2, 3), each = 4), + b = rep(c(0L, NA, 1L, 2L), 4), + c = c(NA, NA, NA, NA, letters[10:21]), + d = rep(c(T, NA, F, T), each = 4), + id = 1:16, + stringsAsFactors = FALSE +) + +equal_df <- function(x, y) { + rownames(x) <- NULL + rownames(y) <- NULL + isTRUE(all.equal(x, y)) +} + +test_that("local arrange sorts missing values to end", { + na_last <- function(x) { + n <- length(x) + all(is.na(x[(n - 3):n])) + } + + # Numeric + expect_true(na_last(arrange(df2, a)$a)) + expect_true(na_last(arrange(df2, desc(a))$a)) + + # Integer + expect_true(na_last(arrange(df2, b)$b)) + expect_true(na_last(arrange(df2, desc(b))$b)) + + # Character + expect_true(na_last(arrange(df2, c)$c)) + expect_true(na_last(arrange(df2, desc(c))$c)) + + # Logical + expect_true(na_last(arrange(df2, d)$d)) + expect_true(na_last(arrange(df2, desc(d))$d)) +}) + +test_that("two arranges equivalent to one", { + df <- tribble( + ~x, ~y, + 2, 1, + 2, -1, + 1, 1 + ) + + df1 <- df %>% arrange(x, y) + df2 <- df %>% arrange(y) %>% arrange(x) + + expect_equal(df1, df2) +}) + +test_that("arrange handles list columns (#282)", { + df <- data.table(a = 2:1) + df$b <- list("foo", "bar") + res <- arrange(df, a) + expect_equal(res$b, list("bar", "foo")) +}) + +test_that("arrange handles the case where ... is missing (#338)", { + expect_equivalent(arrange(mtcars), mtcars) +}) + +# test_that("arrange handles 0-rows data frames", { +# d <- data.table(a = numeric(0)) +# expect_equal(d, arrange(d)) +# }) + +test_that("grouped arrange ignores group (#491 -> #1206)", { + df <- data.table(g = c(2, 1, 2, 1), x = c(4:1)) + + out <- df %>% group_by(g) %>% arrange(x) + expect_equal(out$x, 1:4) +}) + +test_that("arrange keeps the grouping structure (#605)", { + skip_if_dtplyr() + dat <- data.table(g = c(2, 2, 1, 1), x = c(1, 3, 2, 4)) + res <- dat %>% group_by(g) %>% arrange() + expect_is(res, "grouped_df") + expect_equal(res$x, dat$x) + + res <- dat %>% group_by(g) %>% arrange(x) + expect_is(res, "grouped_df") + expect_equal(res$x, 1:4) + expect_equal(attr(res, "indices"), list(c(1, 3), c(0, 2))) +}) + +test_that("arrange handles complex vectors", { + skip_if_dtplyr() + + d <- data.table(x = 1:10, y = 10:1 + 2i) + res <- arrange(d, y) + expect_equal(res$y, rev(d$y)) + expect_equal(res$x, rev(d$x)) + + res <- arrange(res, desc(y)) + expect_equal(res$y, d$y) + expect_equal(res$x, d$x) + + d$y[c(3, 6)] <- NA + res <- arrange(d, y) + expect_true(all(is.na(res$y[9:10]))) + + res <- arrange(d, desc(y)) + expect_true(all(is.na(res$y[9:10]))) + +}) + +test_that("arrange respects attributes #1105", { + env <- environment() + Period <- suppressWarnings(setClass("Period", contains = "numeric", where = env)) + on.exit(removeClass("Period", where = env)) + + df <- data.table(p = Period(c(1, 2, 3)), x = 1:3) + res <- arrange(df, p) + expect_is(res$p, "Period") +}) + +test_that("arrange works with empty data frame (#1142)", { + skip_if_dtplyr() + + df <- data.table() + res <- df %>% arrange + expect_equal(nrow(res), 0L) + expect_equal(length(res), 0L) +}) + +test_that("arrange respects locale (#1280)", { + df2 <- tibble(words = c("casa", "\u00e1rbol", "zona", "\u00f3rgano")) + + res <- df2 %>% arrange(words) + expect_equal(res$words, sort(df2$words)) + + res <- df2 %>% arrange(desc(words)) + expect_equal(res$words, sort(df2$words, decreasing = TRUE)) + +}) + +test_that("duplicated column name is explicit about which column (#996)", { + skip_if_dtplyr() + + df <- data.table(x = 1:10, x = 1:10) + names(df) <- c("x", "x") + + # Error message created by tibble + expect_error(df %>% arrange) + + df <- data.table(x = 1:10, x = 1:10, y = 1:10, y = 1:10) + names(df) <- c("x", "x", "y", "y") + + # Error message created by tibble + expect_error(df %>% arrange) +}) + +test_that("arrange fails gracefully on list columns (#1489)", { + df <- expand.grid(group = 1:2, y = 1, x = 1) %>% + group_by(group) %>% + do(fit = lm(data = ., y ~ x)) + expect_error( + arrange(df, fit), + "Argument 1 is of unsupported type list", + fixed = TRUE + ) +}) + +test_that("arrange fails gracefully on raw columns (#1803)", { + skip_if_dtplyr() + df <- data.table(a = 1:3, b = as.raw(1:3)) + expect_error( + arrange(df, a), + "Column `b` is of unsupported type raw", + fixed = TRUE + ) + expect_error( + arrange(df, b), + "Column `b` is of unsupported type raw", + fixed = TRUE + ) +}) + +test_that("arrange fails gracefully on matrix input (#1870)", { + df <- tibble(a = 1:3, b = 4:6) + expect_error( + arrange(df, is.na(df)), + "Argument 1 is of unsupported type matrix", + fixed = TRUE + ) +}) + + +# grouped_df -------------------------------------------------------------- + +test_that("can choose to inclue grouping vars", { + df <- data.table(g = c(1, 2), x = c(2, 1)) %>% group_by(g) + + df1 <- df %>% arrange(x, .by_group = TRUE) + df2 <- df %>% arrange(g, x) + + expect_equal(df1, df2) +}) + diff --git a/tests/testthat/test-distinct.R b/tests/testthat/test-distinct.R index 0cfb201ed..77f493761 100644 --- a/tests/testthat/test-distinct.R +++ b/tests/testthat/test-distinct.R @@ -33,3 +33,88 @@ test_that("distinct works when key is set", { res <- distinct(dt, x) expect_equal(nrow(res), 1) }) + + +context("Distinct (tests from dplyr):") + +test_that("distinct equivalent to local unique when keep_all is TRUE", { + df <- data.table( + x = c(1, 1, 1, 1), + y = c(1, 1, 2, 2), + z = c(1, 2, 1, 2) + ) + + expect_equal(distinct(df), unique(df)) +}) + +test_that("distinct for single column works as expected (#1937)", { + df <- data.table( + x = c(1, 1, 1, 1), + y = c(1, 1, 2, 2), + z = c(1, 2, 1, 2) + ) + + expect_equal(distinct(df, x, .keep_all = FALSE), unique(df[, .(x)])) + expect_equal(distinct(df, y, .keep_all = FALSE), unique(df[, .(y)])) +}) + +test_that("distinct works for 0-sized columns (#1437)", { + df <- tibble(x = 1:10) %>% select(-x) + ddf <- distinct(df) + expect_equal(ncol(ddf), 0L) +}) + +test_that("if no variables specified, uses all", { + df <- tibble(x = c(1, 1), y = c(2, 2)) + expect_equal(distinct(df), tibble(x = 1, y = 2)) +}) + +test_that("distinct keeps only specified cols", { + df <- tibble(x = c(1, 1, 1), y = c(1, 1, 1)) + expect_equal(df %>% distinct(x), tibble(x = 1)) +}) + +test_that("unless .keep_all = TRUE", { + df <- tibble(x = c(1, 1, 1), y = 3:1) + + expect_equal(df %>% distinct(x), tibble(x = 1)) + expect_equal(df %>% distinct(x, .keep_all = TRUE), tibble(x = 1, y = 3L)) +}) + +test_that("distinct doesn't duplicate columns", { + skip_if_dtplyr() + + df <- data.table(a = 1:3, b = 4:6) + + expect_named(df %>% distinct(a, a), "a") + expect_named(df %>% group_by(a) %>% distinct(a), "a") +}) + +test_that("grouped distinct always includes group cols", { + skip_if_dtplyr() + + df <- data.table(g = c(1, 2), x = c(1, 2)) + + out <- df %>% group_by(g) %>% distinct(x) + expect_equal(df, out) +}) + +test_that("empty grouped distinct equivalent to empty ungrouped", { + df <- data.table(g = c(1, 2), x = c(1, 2)) + + df1 <- df %>% distinct() %>% group_by(g) + df2 <- df %>% group_by(g) %>% distinct() + + expect_equal(df1, df2) +}) + +test_that("distinct on a new, mutated variable is equivalent to mutate followed by distinct", { + df <- data.table(g = c(1, 2), x = c(1, 2)) + + df1 <- df %>% distinct(aa = g * 2) + df2 <- df %>% mutate(aa = g * 2) %>% distinct(aa) + + expect_equal(df1, df2) +}) + + diff --git a/tests/testthat/test-do.R b/tests/testthat/test-do.R index d32b559e6..293b062d9 100644 --- a/tests/testthat/test-do.R +++ b/tests/testthat/test-do.R @@ -72,3 +72,171 @@ test_that("grouped_dt passes all columns", { expect_equal(out$n[[1]], c("mpg", "cyl")) }) + + +context("do (tests from dplyr):") + +# Grouped data frames ---------------------------------------------------------- + +df <- data.table( + g = c(1, 2, 2, 3, 3, 3), + x = 1:6, + y = 6:1 +) %>% group_by(g) + +test_that("can't use both named and unnamed args", { + expect_error( + df %>% do(x = 1, 2), + "Arguments to do() must either be all named or all unnamed", + fixed = TRUE + ) +}) + +test_that("unnamed elements must return data frames", { + skip_if_dtplyr() + + expect_error( + df %>% ungroup %>% do(1), "Result must be a data frame, not numeric") + expect_error( + df %>% do(1), "Results 1, 2, 3 must be data frames, not numeric") + expect_error( + df %>% do("a"), "Results 1, 2, 3 must be data frames, not character") +}) + +test_that("unnamed results bound together by row", { + first <- df %>% do(head(., 1)) + + expect_equal(nrow(first), 3) + expect_equal(first$g, 1:3) + expect_equal(first$x, c(1, 2, 4)) +}) + +test_that("can only use single unnamed argument", { + expect_error( + df %>% do(head, tail), + "Can only supply single unnamed argument" + ) +}) + +test_that("named argument become list columns", { + out <- df %>% do(nrow = nrow(.), ncol = ncol(.)) + expect_equal(out$nrow, list(1, 2, 3)) + # includes grouping columns + expect_equal(out$ncol, list(3, 3, 3)) +}) + +test_that("colums in output override columns in input", { + skip_if_dtplyr() + + out <- df %>% do(data.table(g = 1)) + expect_equal(names(out), "g") + expect_equal(out$g, c(1, 1, 1)) +}) + +test_that("empty results preserved (#597)", { + blankdf <- function(x) data.table(blank = numeric(0)) + + dat <- data.table(a = 1:2, b = factor(1:2)) + expect_equal( + dat %>% group_by(b) %>% do(blankdf(.)), + data.table(b = factor(integer(), levels = 1:2), blank = numeric()) + ) +}) + +test_that("empty inputs give empty outputs (#597)", { + expect_warning( + out <- data.table(a = numeric(), b = factor()) %>% + group_by(b) %>% + do(data.table()) + ) + expect_equal(out, data.table(b = factor()) %>% group_by(b)) + + expect_warning( + out <- data.table(a = numeric(), b = character()) %>% + group_by(b) %>% + do(data.table()) + ) + expect_equal(out, data.table(b = character()) %>% group_by(b)) +}) + +test_that("grouped do evaluates args in correct environment", { + a <- 10 + f <- function(a) { + mtcars %>% group_by(cyl) %>% do(a = a) + } + expect_equal(f(100)$a, list(100, 100, 100)) +}) + +# Ungrouped data frames -------------------------------------------------------- + +test_that("ungrouped data frame with unnamed argument returns data frame", { + out <- mtcars %>% as.data.table %>% do(head(.)) + expect_is(out, "data.table") + expect_equal(dim(out), c(6, 11)) +}) + +test_that("ungrouped data frame with named argument returns list data frame", { + out <- mtcars %>% do(x = 1, y = 2:10) + expect_is(out, "tbl_df") + expect_equal(out$x, list(1)) + expect_equal(out$y, list(2:10)) +}) + +test_that("ungrouped do evaluates args in correct environment", { + a <- 10 + f <- function(a) { + mtcars %>% do(a = a) + } + expect_equal(f(100)$a, list(100)) +}) + +# Zero row inputs -------------------------------------------------------------- + +test_that("empty data frames give consistent outputs", { + dat <- tibble(x = numeric(0), g = character(0)) + grp <- dat %>% group_by(g) + emt <- grp %>% filter(FALSE) + + dat %>% do(data.table()) %>% vapply(type_sum, character(1)) %>% + length %>% expect_equal(0) + dat %>% do(data.table(y = integer(0))) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(y = "int")) + dat %>% do(data.table(.)) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(x = "dbl", g = "chr")) + dat %>% do(data.table(., y = integer(0))) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(x = "dbl", g = "chr", y = "int")) + dat %>% do(y = ncol(.)) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(y = "list")) + + # Grouped data frame should have same col types as ungrouped, with addition + # of grouping variable + grp %>% do(data.table()) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(g = "chr")) + grp %>% do(data.table(y = integer(0))) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(g = "chr", y = "int")) + grp %>% do(data.table(.)) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(x = "dbl", g = "chr")) + grp %>% do(data.table(., y = integer(0))) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(x = "dbl", g = "chr", y = "int")) + grp %>% do(y = ncol(.)) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(g = "chr", y = "list")) + + # A empty grouped dataset should have same types as grp + emt %>% do(data.table()) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(g = "chr")) + emt %>% do(data.table(y = integer(0))) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(g = "chr", y = "int")) + emt %>% do(data.table(.)) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(x = "dbl", g = "chr")) + emt %>% do(data.table(., y = integer(0))) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(x = "dbl", g = "chr", y = "int")) + emt %>% do(y = ncol(.)) %>% vapply(type_sum, character(1)) %>% + expect_equal(c(g = "chr", y = "list")) +}) + +test_that("handling of empty data frames in do", { + blankdf <- function(x) data.table(blank = numeric(0)) + dat <- data.table(a = 1:2, b = factor(1:2)) + res <- dat %>% group_by(b) %>% do(blankdf(.)) + expect_equal(names(res), c("b", "blank")) +}) diff --git a/tests/testthat/test-filter.R b/tests/testthat/test-filter.R index a16ea37a4..ecd15e2de 100644 --- a/tests/testthat/test-filter.R +++ b/tests/testthat/test-filter.R @@ -8,8 +8,351 @@ test_that("filter succeeds even if column called V1 (#615)", { }) test_that("filter_ works (#906)", { + skip_if_dtplyr() dt <- data.table::data.table(x = 1:10 ,V1 = 0) out <- dt %>% filter_(~x > 5) expect_equal(nrow(out), 5) }) + +context("filter (copied from dplyr") + +test_that("filter fails if inputs incorrect length (#156)", { + skip_if_dtplyr() + + expect_error( + filter(tbl_dt(mtcars), c(F, T)), + "Result must have length 32, not 2", + fixed = TRUE + ) + expect_error( + filter(group_by(mtcars, am), c(F, T)), + "Result must have length 19, not 2", + fixed = TRUE + ) +}) + +test_that("filter gives useful error message when given incorrect input", { + # error message by rlang + expect_error(filter(tbl_dt(mtcars), `_x`), + "_x", + fixed = TRUE + ) +}) + +test_that("filter complains in inputs are named", { + expect_error( + filter(mtcars, x = 1), + "`x` (`x = 1`) must not be named, do you need `==`?", + fixed = TRUE + ) + expect_error( + filter(mtcars, x = 1 & y > 2), + "`x` (`x = 1 & y > 2`) must not be named, do you need `==`?", + fixed = TRUE + ) +}) + + +test_that("filter handles passing ...", { + skip_if_dtplyr() + + df <- data.table(x = 1:4) + + f <- function(...) { + x1 <- 4 + f1 <- function(y) y + filter(df, ..., f1(x1) > x) + } + g <- function(...) { + x2 <- 2 + f(x > x2, ...) + } + res <- g() + expect_equal(res$x, 3L) + + df <- group_by(df, x) + res <- g() + expect_equal(res$x, 3L) +}) + +test_that("filter handles simple symbols", { + df <- data.table(x = 1:4, test = rep(c(T, F), each = 2)) + res <- filter(df, test) + + gdf <- group_by(df, x) + res <- filter(gdf, test) + + h <- function(data) { + test2 <- c(T, T, F, F) + filter(data, test2) + } + expect_equal(h(df), df[1:2, ]) + + skip_if_dtplyr() + + f <- function(data, ...) { + one <- 1 + filter(data, test, x > one, ...) + } + g <- function(data, ...) { + four <- 4 + f(data, x < four, ...) + } + res <- g(df) + expect_equal(res$x, 2L) + expect_equal(res$test, TRUE) + + res <- g(gdf) + expect_equal(res$x, 2L) + expect_equal(res$test, TRUE) + +}) + +test_that("filter handlers scalar results", { + expect_equivalent(filter(mtcars, min(mpg) > 0), mtcars) + expect_equal(filter(group_by(mtcars, cyl), min(mpg) > 0), group_by(mtcars, cyl)) +}) + +test_that("filter propagates attributes", { + date.start <- ISOdate(2010, 01, 01, 0) + test <- data.table(Date = ISOdate(2010, 01, 01, 1:10)) + test2 <- test %>% filter(Date < ISOdate(2010, 01, 01, 5)) + expect_equal(test$Date[1:4], test2$Date) +}) + +test_that("filter fails on integer indices", { + expect_error( + filter(mtcars, 1:2), + "Argument 2 filter condition does not evaluate to a logical vector", + fixed = TRUE + ) + expect_error( + filter(group_by(mtcars, cyl), 1:2), + "Argument 2 filter condition does not evaluate to a logical vector", + fixed = TRUE + ) +}) + +test_that("filter discards NA", { + temp <- data.table( + i = 1:5, + x = c(NA, 1L, 1L, 0L, 0L) + ) + res <- filter(temp, x == 1) + expect_equal(nrow(res), 2L) +}) + +test_that("date class remains on filter (#273)", { + x1 <- x2 <- data.table( + date = seq.Date(as.Date("2013-01-01"), by = "1 days", length.out = 2), + var = c(5, 8) + ) + x1.filter <- x1 %>% filter(as.Date(date) > as.Date("2013-01-01")) + x2$date <- x2$date + 1 + x2.filter <- x2 %>% filter(as.Date(date) > as.Date("2013-01-01")) + + expect_equal(class(x1.filter$date), "Date") + expect_equal(class(x2.filter$date), "Date") +}) + +test_that("filter handles $ correctly (#278)", { + d1 <- tbl_dt(data.table( + num1 = as.character(sample(1:10, 1000, T)), + var1 = runif(1000), + stringsAsFactors = FALSE)) + d2 <- data.table(num1 = as.character(1:3), stringsAsFactors = FALSE) + + res1 <- d1 %>% filter(num1 %in% c("1", "2", "3")) + res2 <- d1 %>% filter(num1 %in% d2$num1) + expect_equal(res1, res2) +}) + +test_that("filter returns the input data if no parameters are given", { + expect_equivalent(filter(mtcars), mtcars) +}) + +test_that("$ does not end call traversing. #502", { + # Suppose some analysis options are set much earlier in the script + analysis_opts <- list(min_outcome = 0.25) + + # Generate some dummy data + d <- expand.grid(Subject = 1:3, TrialNo = 1:2, Time = 1:3) %>% tbl_dt %>% + arrange(Subject, TrialNo, Time) %>% + mutate(Outcome = (1:18 %% c(5, 7, 11)) / 10) + + # Do some aggregation + trial_outcomes <- d %>% group_by(Subject, TrialNo) %>% + summarise(MeanOutcome = mean(Outcome)) + + left <- filter(trial_outcomes, MeanOutcome < analysis_opts$min_outcome) + right <- filter(trial_outcomes, analysis_opts$min_outcome > MeanOutcome) + + expect_equal(left, right) + +}) + +test_that("filter uses the white list (#566)", { + datesDF <- read.csv(stringsAsFactors = FALSE, text = " +X +2014-03-13 16:08:19 +2014-03-13 16:16:23 +2014-03-13 16:28:28 +2014-03-13 16:28:54 +") + + datesDF$X <- as.POSIXlt(datesDF$X) + # error message from tibble + expect_error(filter(datesDF, X > as.POSIXlt("2014-03-13"))) +}) + +test_that("filter handles complex vectors (#436)", { + d <- data.table(x = 1:10, y = 1:10 + 2i) + expect_equal(filter(d, x < 4)$y, 1:3 + 2i) + expect_equal(filter(d, Re(y) < 4)$y, 1:3 + 2i) +}) + +test_that("%in% works as expected (#126)", { + df <- tibble(a = c("a", "b", "ab"), g = c(1, 1, 2)) + + res <- df %>% filter(a %in% letters) + expect_equal(nrow(res), 2L) + + res <- df %>% group_by(g) %>% filter(a %in% letters) + expect_equal(nrow(res), 2L) + +}) + +test_that("row_number does not segfault with example from #781", { + z <- data.table(a = c(1, 2, 3)) + b <- "a" + res <- z %>% filter(row_number(b) == 2) + expect_equal(nrow(res), 0L) +}) + +test_that("filter does not alter expression (#971)", { + my_filter <- ~ am == 1 + expect_equal(my_filter[[2]][[2]], as.name("am")) +}) + +test_that("hybrid evaluation handles $ correctly (#1134)", { + df <- tibble(x = 1:10, g = rep(1:5, 2)) + res <- df %>% group_by(g) %>% filter(x > min(df$x)) + expect_equal(nrow(res), 9L) +}) + +test_that("filter correctly handles empty data frames (#782)", { + res <- tibble() %>% filter(F) + expect_equal(nrow(res), 0L) + expect_equal(length(names(res)), 0L) +}) + +test_that("filter(.,TRUE,TRUE) works (#1210)", { + df <- data.table(x = 1:5) + res <- filter(df, TRUE, TRUE) + expect_equal(res, df) +}) + +test_that("filter, slice and arrange preserves attributes (#1064)", { + skip_if_dtplyr() + + df <- structure( + data.table(x = 1:10, g1 = rep(1:2, each = 5), g2 = rep(1:5, 2)), + meta = "this is important" + ) + res <- filter(df, x < 5) %>% attr("meta") + expect_equal(res, "this is important") + + res <- filter(df, x < 5, x > 4) %>% attr("meta") + expect_equal(res, "this is important") + + res <- df %>% slice(1:50) %>% attr("meta") + expect_equal(res, "this is important") + + res <- df %>% arrange(x) %>% attr("meta") + expect_equal(res, "this is important") + + res <- df %>% summarise(n()) %>% attr("meta") + expect_equal(res, "this is important") + + res <- df %>% group_by(g1) %>% summarise(n()) %>% attr("meta") + expect_equal(res, "this is important") + + res <- df %>% group_by(g1, g2) %>% summarise(n()) %>% attr("meta") + expect_equal(res, "this is important") + +}) + +test_that("filter works with rowwise data (#1099)", { + df <- tibble(First = c("string1", "string2"), Second = c("Sentence with string1", "something")) + res <- df %>% rowwise() %>% filter(grepl(First, Second, fixed = TRUE)) + expect_equal(nrow(res), 1L) + expect_equal(df[1, ], res) +}) + +test_that("grouped filter handles indices (#880)", { + res <- iris %>% group_by(Species) %>% filter(Sepal.Length > 5) + res2 <- mutate(res, Petal = Petal.Width * Petal.Length) + expect_equal(nrow(res), nrow(res2)) + expect_equal(attr(res, "indices"), attr(res2, "indices")) +}) + +test_that("filter(FALSE) drops indices", { + skip_if_dtplyr() + out <- mtcars %>% + group_by(cyl) %>% + filter(FALSE) %>% + attr("indices") + expect_identical(out, list()) +}) + +test_that("filter handles S4 objects (#1366)", { + env <- environment() + Numbers <- suppressWarnings(setClass( + "Numbers", slots = c(foo = "numeric"), contains = "integer", where = env + )) + on.exit(removeClass("Numbers", where = env)) + + df <- data.table(x = Numbers(1:10, foo = 10)) + res <- filter(df, x > 3) + expect_true(isS4(res$x)) + expect_is(res$x, "Numbers") + expect_equal(res$x@foo, 10) +}) + +test_that("hybrid lag and default value for string columns work (#1403)", { + res <- mtcars %>% + mutate(xx = LETTERS[gear]) %>% + filter(xx == lag(xx, default = "foo")) + xx <- LETTERS[mtcars$gear] + ok <- xx == lag(xx, default = "foo") + expect_equal(xx[ok], res$xx) + + res <- mtcars %>% + mutate(xx = LETTERS[gear]) %>% + filter(xx == lead(xx, default = "foo")) + xx <- LETTERS[mtcars$gear] + ok <- xx == lead(xx, default = "foo") + expect_equal(xx[ok], res$xx) +}) + +# .data and .env tests now in test-hybrid-traverse.R + +test_that("filter fails gracefully on raw columns (#1803)", { + skip_if_dtplyr() + df <- tibble(a = 1:3, b = as.raw(1:3)) + expect_error( + filter(df, a == 1), + "Column `b` is of unsupported type raw", + fixed = TRUE + ) + expect_error( + filter(df, b == 1), + "Column `b` is of unsupported type raw", + fixed = TRUE + ) +}) + +test_that("`vars` attribute is not added if empty (#2772)", { + expect_identical(data.table(x = 1:2) %>% filter(x == 1), data.table(x = 1L)) +}) diff --git a/tests/testthat/test-group-by.R b/tests/testthat/test-group-by.R index 67610f40c..2b17ebfae 100644 --- a/tests/testthat/test-group-by.R +++ b/tests/testthat/test-group-by.R @@ -32,3 +32,340 @@ test_that("constructors drops groups", { dt <- lahman_dt() %>% tbl("Batting") %>% group_by(playerID) expect_equal(groups(tbl_dt(dt)), NULL) }) + + +context("Group by (tests from dplyr)") + +expect_groups <- function(df, groups, info = NULL) { + if (length(groups) == 0L) { + expect_null(groups(df), info = info) + expect_identical(group_vars(df), character(), info = info) + } else { + expect_identical(groups(df), lapply(enc2native(groups), as.name), info = info) + expect_identical(group_vars(df), groups, info = info) + } +} + +expect_no_groups <- function(df) { + expect_groups(df, NULL) +} + +df <- data.table(x = rep(1:3, each = 10), y = rep(1:6, each = 5)) + +test_that("group_by with add = TRUE adds groups", { + add_groups1 <- function(tbl) group_by(tbl, x, y, add = TRUE) + add_groups2 <- function(tbl) group_by(group_by(tbl, x, add = TRUE), y, add = TRUE) + + expect_groups(add_groups1(df), c("x", "y")) + expect_groups(add_groups2(df), c("x", "y")) +}) + +test_that("joins preserve grouping", { + g <- group_by(df, x) + + expect_groups(inner_join(g, g, by = c("x", "y")), "x") + expect_groups(left_join (g, g, by = c("x", "y")), "x") + # expect_groups(semi_join (g, g, by = c("x", "y")), "x") + expect_groups(anti_join (g, g, by = c("x", "y")), "x") +}) + +test_that("constructors drops groups", { + df <- data.table(x = 1:3) %>% group_by(x) + expect_no_groups(tbl_dt(df)) +}) + +test_that("grouping by constant adds column (#410)", { + grouped <- group_by(mtcars, "cyl") %>% summarise(foo = n()) + expect_equal(names(grouped), c('"cyl"', "foo")) + expect_equal(nrow(grouped), 1L) +}) + +# Test full range of variable types -------------------------------------------- + + +test_that("local group_by preserves variable types", { + df_var <- data.table( + l = c(T, F), + i = 1:2, + d = Sys.Date() + 1:2, + f = factor(letters[1:2]), + num = 1:2 + 0.5, + t = Sys.time() + 1:2, + c = letters[1:2] + ) + + for (var in names(df_var)) { + expected <- data.table(unique(df_var[[var]]), n = 1L) + names(expected)[1] <- var + + summarised <- df_var %>% group_by(!! rlang::sym(var)) %>% summarise(n = n()) + expect_equal(summarised, expected, info = var) + } +}) + +test_that("mutate does not loose variables (#144)", { + df <- tbl_dt(data.table(a = rep(1:4, 2), b = rep(1:4, each = 2), x = runif(8))) + by_ab <- group_by(df, a, b) + by_a <- summarise(by_ab, x = sum(x)) + by_a_quartile <- group_by(by_a, quartile = ntile(x, 4)) + + expect_equal(names(by_a_quartile), c("a", "b", "x", "quartile")) +}) + +test_that("group_by uses shallow copy", { + skip_if_dtplyr() + + m1 <- group_by(mtcars, cyl) + expect_no_groups(mtcars) + + expect_equal(dfloc(mtcars), dfloc(m1)) +}) + +test_that("FactorVisitor handles NA. #183", { + skip_if_dtplyr() + + # g <- group_by(as.data.table(MASS::survey), M.I) + # expect_equal(g$M.I, MASS::survey$M.I) +}) + +test_that("group_by orders by groups. #242", { + skip_if_dtplyr() + + df <- data.table(a = sample(1:10, 3000, replace = TRUE)) %>% group_by(a) + expect_equal(attr(df, "labels")$a, 1:10) + + df <- data.table(a = sample(letters[1:10], 3000, replace = TRUE), stringsAsFactors = FALSE) %>% group_by(a) + expect_equal(attr(df, "labels")$a, letters[1:10]) + + df <- data.table(a = sample(sqrt(1:10), 3000, replace = TRUE)) %>% group_by(a) + expect_equal(attr(df, "labels")$a, sqrt(1:10)) +}) + +test_that("group_by uses the white list", { + skip_if_dtplyr() + + df <- data.table(times = 1:5) + df$times <- as.POSIXlt(seq.Date(Sys.Date(), length.out = 5, by = "day")) + expect_error( + group_by(df, times), + "Column `times` is of unsupported class POSIXlt/POSIXt", + fixed = TRUE + ) +}) + +test_that("group_by fails when lists are used as grouping variables (#276)", { + skip_if_dtplyr() + + df <- data.table(x = 1:3) + df$y <- list(1:2, 1:3, 1:4) + expect_error( + group_by(df, y), + "Column `y` can't be used as a grouping variable because it's a list", + fixed = TRUE + ) +}) + +test_that("select(group_by(.)) implicitely adds grouping variables (#170)", { + res <- as.data.table(mtcars) %>% group_by(vs) %>% select(mpg) + expect_equal(names(res), c("vs", "mpg")) +}) + +test_that("grouped_df errors on empty vars (#398)", { + skip_if_dtplyr() + + m <- as.data.table(mtcars) %>% group_by(cyl) + attr(m, "vars") <- NULL + attr(m, "indices") <- NULL + expect_error( + m %>% do(mpg = mean(.$mpg)), + "no variables to group by", + fixed = TRUE + ) +}) + +test_that("grouped_df errors on non-existent var (#2330)", { + df <- data.table(x = 1:5) + expect_error( + grouped_df(df, list(quote(y))), + "Column `y` is unknown" + ) +}) + +test_that("group_by only creates one group for NA (#401)", { + x <- as.numeric(c(NA, NA, NA, 10:1, 10:1)) + w <- c(20, 30, 40, 1:10, 1:10) * 10 + + n_distinct(x) # 11 OK + res <- data.table(x = x, w = w) %>% group_by(x) %>% summarise(n = n()) + expect_equal(nrow(res), 11L) +}) + +test_that("there can be 0 groups (#486)", { + data <- data.table(a = numeric(0), g = character(0)) %>% group_by(g) + expect_equal(length(data$a), 0L) + expect_equal(length(data$g), 0L) + # expect_equal(attr(data, "group_sizes"), integer(0)) +}) + +test_that("group_by works with zero-row data frames (#486)", { + skip_if_dtplyr() + dfg <- group_by(data.table(a = numeric(0), b = numeric(0), g = character(0)), g) + expect_equal(dim(dfg), c(0, 3)) + expect_groups(dfg, "g") + expect_equal(group_size(dfg), integer(0)) + + x <- summarise(dfg, n = n()) + expect_equal(dim(x), c(0, 2)) + expect_no_groups(x) + + x <- mutate(dfg, c = b + 1) + expect_equal(dim(x), c(0, 4)) + expect_groups(x, "g") + expect_equal(group_size(x), integer(0)) + + x <- filter(dfg, a == 100) + expect_equal(dim(x), c(0, 3)) + expect_groups(x, "g") + expect_equal(group_size(x), integer(0)) + + x <- arrange(dfg, a, g) + expect_equal(dim(x), c(0, 3)) + # expect_groups(x, "g") + # expect_equal(group_size(x), integer(0)) + + x <- select(dfg, a) # Only select 'a' column; should result in 'g' and 'a' + expect_equal(dim(x), c(0, 2)) + expect_groups(x, "g") + expect_equal(group_size(x), integer(0)) +}) + +test_that("grouped_df requires a list of symbols (#665)", { + features <- list("feat1", "feat2", "feat3") + # error message by assertthat + expect_error(grouped_df(data.table(feat1 = 1, feat2 = 2, feat3 = 3), features)) +}) + +test_that("group_by gives meaningful message with unknow column (#716)", { + expect_error( + group_by(iris, wrong_name_of_variable), + "Column `wrong_name_of_variable` is unknown", + fixed = TRUE + ) +}) + +test_that("[ on grouped_df preserves grouping if subset includes grouping vars", { + skip_if_dtplyr() + + df <- data.table(x = 1:5, ` ` = 6:10) + by_x <- df %>% group_by(x) + expect_equal(by_x %>% groups(), by_x %>% `[`(1:2) %>% groups) + + # non-syntactic name + by_ns <- df %>% group_by(` `) + expect_equal(by_ns %>% groups(), by_ns %>% `[`(1:2) %>% groups) +}) + + +test_that("[ on grouped_df drops grouping if subset doesn't include grouping vars", { + skip_if_dtplyr() + by_cyl <- as.data.table(mtcars) %>% group_by(cyl) + no_cyl <- by_cyl %>% `[`(c(1, 3)) + + expect_no_groups(no_cyl) + expect_is(no_cyl, "tbl_dt") +}) + +test_that("group_by works after arrange (#959)", { + skip_if_dtplyr() + df <- data.table(Log = c(1, 2, 1, 2, 1, 2), Time = c(10, 1, 3, 0, 15, 11)) + res <- df %>% + arrange(Time) %>% + group_by(Log) %>% + mutate(Diff = Time - lag(Time)) + expect_true(all(is.na(res$Diff[c(1, 3)]))) + expect_equal(res$Diff[c(2, 4, 5, 6)], c(1, 7, 10, 5)) +}) + +test_that("group_by keeps attributes", { + d <- data.table(x = structure(1:10, foo = "bar")) + gd <- group_by(d) + expect_equal(attr(gd$x, "foo"), "bar") +}) + +test_that("ungroup.rowwise_df gives a tbl_dt (#936)", { + skip_if_dtplyr() + + res <- tbl_dt(mtcars) %>% rowwise %>% ungroup %>% class + expect_equal(res, c("tbl_dt", "tbl", "data.table")) +}) + +test_that(paste0("group_by handles encodings for native strings (#1507)"), { + skip_if_dtplyr() + + with_non_utf8_encoding({ + special <- get_native_lang_string() + + df <- data.table(x = 1:3, Eng = 2:4) + + for (names_converter in c(enc2native, enc2utf8)) { + for (dots_converter in c(enc2native, enc2utf8)) { + names(df) <- names_converter(c(special, "Eng")) + res <- group_by(df, !!! syms(dots_converter(special))) + expect_equal(names(res), names(df)) + expect_groups(res, special) + } + } + + for (names_converter in c(enc2native, enc2utf8)) { + names(df) <- names_converter(c(special, "Eng")) + + res <- group_by(df, !!! special) + expect_equal(names(res), c(names(df), deparse(special))) + expect_equal(groups(res), list(as.name(enc2native(deparse(special))))) + } + }) +}) + +test_that("group_by fails gracefully on raw columns (#1803)", { + skip_if_dtplyr() + + df <- data.table(a = 1:3, b = as.raw(1:3)) + expect_error( + group_by(df, a), + "Column `b` is of unsupported type raw", + fixed = TRUE + ) + expect_error( + group_by(df, b), + "Column `b` is of unsupported type raw", + fixed = TRUE + ) +}) + +test_that("rowwise fails gracefully on raw columns (#1803)", { + skip_if_dtplyr() + df <- data.table(a = 1:3, b = as.raw(1:3)) + expect_error( + rowwise(df), + "Column `b` is of unsupported type raw", + fixed = TRUE + ) +}) + +test_that("group_by() names pronouns correctly (#2686)", { + expect_named(group_by(tibble(x = 1), .data$x), "x") + expect_named(group_by(tibble(x = 1), .data[["x"]]), "x") +}) + +test_that("group_by() does not affect input data (#3028)", { + x <- + data.table(old1 = c(1, 2, 3), old2 = c(4, 5, 6)) %>% + group_by(old1) + + y <- + x %>% + select(new1 = old1, new2 = old2) + + expect_identical(groups(x), rlang::syms(quote(old1))) +}) + diff --git a/tests/testthat/test-joins.R b/tests/testthat/test-joins.R index f0fe99a67..af69f843b 100644 --- a/tests/testthat/test-joins.R +++ b/tests/testthat/test-joins.R @@ -34,8 +34,8 @@ test_that("joining data tables returns same result as dplyr", { a_dt <- data.table(x = c(1, 1, 2, 3), y = 4:1) b_dt <- data.table(x = c(1, 2, 2, 4), z = 1:4) - a_df <- as_data_frame(a_dt) - b_df <- as_data_frame(b_dt) + a_df <- as_tibble(a_dt) + b_df <- as_tibble(b_dt) test_join <- function(join_fun) { out <- join_fun(a_dt, b_dt, "x") @@ -56,15 +56,15 @@ test_that("changing suffixes works as in dplyr", { a_dt <- data.table(x = c(1, 1, 2, 3), y = 4:1) b_dt <- data.table(x = c(1, 2, 2, 4), z = 1:4) - a_df <- as_data_frame(a_dt) - b_df <- as_data_frame(b_dt) + a_df <- as_tibble(a_dt) + b_df <- as_tibble(b_dt) test_join <- function(join_fun) { out <- join_fun(a_dt, b_dt, "x", suffix = c("left", "right")) out_dplyr <- tbl_dt(join_fun(a_df, b_df, "x", suffix = c("left", "right"))) expect_equal(out, out_dplyr) } - + test_join(left_join) test_join(right_join) test_join(full_join) diff --git a/tests/testthat/test-mutate.R b/tests/testthat/test-mutate.R index 81f048f9e..8b43b4032 100644 --- a/tests/testthat/test-mutate.R +++ b/tests/testthat/test-mutate.R @@ -4,3 +4,830 @@ test_that("mutate modifies same column repeatedly (#243)", { dt <- data.table::data.table(x = 1) expect_equal(mutate(dt, x = x + 1, x = x + 1)$x, 3) }) + + +context("mutate (tests from dplyr)") + +test_that("repeated outputs applied progressively (data frame)", { + df <- data.table(x = 1) + out <- mutate(df, z = x + 1, z = z + 1) + + expect_equal(nrow(out), 1) + expect_equal(ncol(out), 2) + + expect_equal(out$z, 3) +}) + +test_that("repeated outputs applied progressively (grouped_df)", { + df <- data.table(x = c(1, 1), y = 1:2) + ds <- group_by(df, y) + out <- mutate(ds, z = x + 1, z = z + 1) + + expect_equal(nrow(out), 2) + expect_equal(ncol(out), 3) + + expect_equal(out$z, c(3L, 3L)) +}) + +test_that("two mutates equivalent to one", { + df <- data.table(x = 1:10, y = 6:15) + + df1 <- df %>% mutate(x2 = x * 2, y4 = y * 4) + df2 <- df %>% mutate(x2 = x * 2) %>% mutate(y4 = y * 4) + + expect_equal(df1, df2) +}) + +test_that("mutate can refer to variables that were just created (#140)", { + res <- mutate(tbl_dt(mtcars), cyl1 = cyl + 1, cyl2 = cyl1 + 1) + expect_equal(res$cyl2, mtcars$cyl + 2) + + gmtcars <- group_by(tbl_dt(mtcars), am) + res <- mutate(gmtcars, cyl1 = cyl + 1, cyl2 = cyl1 + 1) + res_direct <- mutate(gmtcars, cyl2 = cyl + 2) + expect_equal(res$cyl2, res_direct$cyl2) +}) + +test_that("mutate handles logical result (#141)", { + x <- data.table(x = 1:10, g = rep(c(1, 2), each = 5)) + res <- tbl_dt(x) %>% group_by(g) %>% mutate(r = x > mean(x)) + expect_equal(res$r, rep(c(FALSE, FALSE, FALSE, TRUE, TRUE), 2)) +}) + +test_that("mutate can rename variables (#137)", { + res <- mutate(tbl_dt(mtcars), cyl2 = cyl) + expect_equal(res$cyl2, mtcars$cyl) + + res <- mutate(group_by(tbl_dt(mtcars), am), cyl2 = cyl) + expect_equal(res$cyl2, res$cyl) +}) + +test_that("mutate refuses to modify grouping vars (#143)", { + skip_if_dtplyr() + + expect_error( + mutate(group_by(tbl_dt(mtcars), am), am = am + 2), + "Column `am` can't be modified because it's a grouping variable", + fixed = TRUE + ) +}) + +test_that("mutate handles constants (#152)", { + res <- mutate(tbl_dt(mtcars), zz = 1) + expect_equal(res$zz, rep(1, nrow(mtcars))) +}) + +test_that("mutate fails with wrong result size (#152)", { + skip_if_dtplyr() + + df <- group_by(data.table(x = c(2, 2, 3, 3)), x) + expect_equal(mutate(df, y = 1:2)$y, rep(1:2, 2)) + expect_error( + mutate(data.table(mtcars), zz = 1:2), + "Column `zz` must be length 32 (the number of rows) or one, not 2", + fixed = TRUE + ) + + df <- group_by(data.table(x = c(2, 2, 3, 3, 3)), x) + expect_error( + mutate(df, y = 1:2), + "Column `y` must be length 3 (the group size) or one, not 2", + fixed = TRUE + ) +}) + +test_that("mutate refuses to use symbols not from the data", { + skip_if_dtplyr() + + y <- 1:6 + df <- group_by(data.table(x = c(1, 2, 2, 3, 3, 3)), x) + expect_error( + mutate(df, z = y), + "Column `z` must be length 1 (the group size), not 6", + fixed = TRUE + ) +}) + +test_that("mutate recycles results of length 1", { + skip_if_dtplyr() + + df <- data.table(x = c(2, 2, 3, 3)) + expect_equal(mutate(tbl_dt(df), z = length(x))$z, rep(4, 4)) + expect_equal(mutate(group_by(df, x), z = length(x))$z, rep(2, 4)) + + int <- 1L + str <- "foo" + num <- 1 + bool <- TRUE + list <- list(NULL) + + res <- mutate(group_by(df, x), int = int, str = str, num = num, bool = bool, list = list) + expect_equal(res$int , rep(int , 4)) + expect_equal(res$str , rep(str , 4)) + expect_equal(res$num , rep(num , 4)) + expect_equal(res$bool, rep(bool, 4)) + expect_equal(res$list, rep(list, 4)) +}) + + +test_that("mutate handles out of data variables", { + skip_if_dtplyr() + + today <- Sys.Date() + now <- Sys.time() + df <- data.table(x = c(2, 2, 3, 3)) + gdf <- group_by(df, x) + + int <- c(1L, 2L) + str <- c("foo", "bar") + num <- c(1, 2) + bool <- c(TRUE, FALSE) + dat <- rep(today, 2) + tim <- rep(now, 2) + + res <- mutate( + gdf, + int = int, str = str, num = num, bool = bool, dat = dat, tim = tim + ) + expect_equal(res$int , rep(int , 2)) + expect_equal(res$str , rep(str , 2)) + expect_equal(res$num , rep(num , 2)) + expect_equal(res$bool, rep(bool, 2)) + expect_equal(res$dat , rep(dat , 2)) + expect_equal(res$tim , rep(tim , 2)) + + int <- 1:6 + expect_error( + mutate(gdf, int = int), + "Column `int` must be length 2 (the group size) or one, not 6", + fixed = TRUE + ) + expect_error( + mutate(tbl_dt(df), int = int), + "Column `int` must be length 4 (the number of rows) or one, not 6", + fixed = TRUE + ) + + int <- 1:4 + str <- rep(c("foo", "bar"), 2) + num <- c(1, 2, 3, 4) + bool <- c(TRUE, FALSE, FALSE, TRUE) + dat <- rep(today, 4) + tim <- rep(now, 4) + + res <- mutate( + tbl_dt(df), + int = int, str = str, num = num, bool = bool, tim = tim, dat = dat + ) + expect_equal(res$int , int) + expect_equal(res$str , str) + expect_equal(res$num , num) + expect_equal(res$bool, bool) + expect_equal(res$dat , dat) + expect_equal(res$tim , tim) +}) + +test_that("mutate handles passing ...", { + skip_if_dtplyr() + + df <- data.table(x = 1:4) + + f <- function(...) { + x1 <- 1 + f1 <- function(x) x + mutate(df, ..., x1 = f1(x1)) + } + g <- function(...) { + x2 <- 2 + f(x2 = x2, ...) + } + h <- function(before = "before", ..., after = "after") { + g(before = before, ..., after = after) + } + + res <- h(x3 = 3) + expect_equal(res$x1, rep(1, 4)) + expect_equal(res$x2, rep(2, 4)) + expect_equal(res$before, rep("before", 4)) + expect_equal(res$after, rep("after", 4)) + + df <- tbl_dt(df) + res <- h(x3 = 3) + expect_equal(res$x1, rep(1, 4)) + expect_equal(res$x2, rep(2, 4)) + expect_equal(res$before, rep("before", 4)) + expect_equal(res$after, rep("after", 4)) + + df <- group_by(df, x) + res <- h(x3 = 3) + expect_equal(res$x1, rep(1, 4)) + expect_equal(res$x2, rep(2, 4)) + expect_equal(res$before, rep("before", 4)) + expect_equal(res$after, rep("after", 4)) + +}) + +test_that("mutate fails on unsupported column type", { + skip_if_dtplyr() + + df <- data.table(created = c("2014/1/1", "2014/1/2", "2014/1/2")) + expect_error( + mutate(df, date = strptime(created, "%Y/%m/%d")), + "Column `date` is of unsupported class POSIXlt", + fixed = TRUE + ) + + df <- data.table( + created = c("2014/1/1", "2014/1/2", "2014/1/2"), + g = c(1, 1, 2) + ) + expect_error( + mutate(group_by(df, g), date = strptime(created, "%Y/%m/%d")), + "Column `date` is of unsupported class POSIXlt", + fixed = TRUE + ) +}) + +test_that("mutate modifies same column repeatedly (#243)", { + df <- data.table(x = 1) + expect_equal(mutate(df, x = x + 1, x = x + 1)$x, 3) +}) + +test_that("mutate errors when results are not compatible accross groups (#299)", { + skip_if_dtplyr() + + d <- data.table(x = rep(1:5, each = 3)) + expect_error( + mutate(group_by(d, x), val = ifelse(x < 3, "foo", 2)), + "Column `val` can't be converted from character to numeric", + fixed = TRUE + ) +}) + +test_that("assignments don't overwrite variables (#315)", { + expect_equal( + mutate(mtcars, cyl2 = { mpg <- cyl ^ 2; -mpg }), + mutate(mtcars, cyl2 = -cyl ^ 2) + ) +}) + +test_that("hybrid evaluator uses correct environment (#403)", { + func1 <- function() { + func2 <- function(x) floor(x) + mutate(mtcars, xx = func2(mpg / sum(mpg))) + } + res <- func1() + expect_equal(res$xx, rep(0, nrow(res))) +}) + +test_that("mutate remove variables with = NULL syntax (#462)", { + data <- mtcars %>% mutate(cyl = NULL) + expect_false("cyl" %in% names(data)) + + data <- mtcars %>% group_by(disp) %>% mutate(cyl = NULL) + expect_false("cyl" %in% names(data)) +}) + +test_that("mutate strips names, but only if grouped (#1689, #2675)", { + skip_if_dtplyr() + + data <- data.table(a = 1:3) %>% mutate(b = setNames(nm = a)) + expect_equal(names(data$b), as.character(1:3)) + + data <- data.table(a = 1:3) %>% rowwise %>% mutate(b = setNames(nm = a)) + expect_null(names(data$b)) + + data <- data.table(a = c(1, 1, 2)) %>% group_by(a) %>% mutate(b = setNames(nm = a)) + expect_null(names(data$b)) +}) + +test_that("mutate does not strip names of list-columns (#2675)", { + skip_if_dtplyr() + + vec <- list(a = 1, b = 2) + data <- data.table(x = vec) + data <- mutate(data, x) + expect_identical(names(vec), c("a", "b")) + expect_identical(names(data$x), c("a", "b")) +}) + +test_that("mutate gives a nice error message if an expression evaluates to NULL (#2187)", { + skip_if_dtplyr() + + df <- data.table(a = 1:3) + gf <- group_by(df, a) + rf <- rowwise(df) + + expect_error( + mutate(df, b = identity(NULL)), + "Column `b` is of unsupported type NULL", + fixed = TRUE + ) + expect_error( + mutate(gf, b = identity(NULL)), + "Column `b` is of unsupported type NULL", + fixed = TRUE + ) + expect_error( + mutate(rf, b = identity(NULL)), + "Column `b` is of unsupported type NULL", + fixed = TRUE + ) +}) + +test_that("mutate(rowwise_df) makes a rowwise_df (#463)", { + skip_if_dtplyr() + one_mod <- data.table(grp = "a", x = runif(5, 0, 1)) %>% + tbl_dt %>% + mutate(y = rnorm(x, x * 2, 1)) %>% + group_by(grp) %>% + do(mod = lm(y~x, data = .)) + + out <- one_mod %>% + mutate(rsq = summary(mod)$r.squared) %>% + mutate(aic = AIC(mod)) + + expect_is(out, "rowwise_df") + expect_equal(nrow(out), 1L) + expect_is(out$mod, "list") + expect_is(out$mod[[1L]], "lm") +}) + +test_that("mutate allows list columns (#555)", { + df <- data.table(x = c("a;b", "c;d;e"), stringsAsFactors = FALSE) + res <- mutate(df, pieces = strsplit(x, ";")) + expect_equal(res$pieces, list(c("a", "b"), c("c", "d", "e"))) +}) + +test_that("hybrid evaluation goes deep enough (#554)", { + res1 <- iris %>% mutate(test = 1 == 2 | row_number() < 10) + res2 <- iris %>% mutate(test = row_number() < 10 | 1 == 2) + expect_equal(res1, res2) +}) + +test_that("hybrid does not segfault when given non existing variable (#569)", { + # error message from rlang + expect_error(mtcars %>% summarise(first(mp))) +}) + +test_that("namespace extraction works in hybrid (#412)", { + df <- data.table(x = 1:2) + + expect_equal( + mutate(df, y = base::mean(x)), + mutate(df, y = mean(x)) + ) + expect_equal( + mutate(df, y = stats::IQR(x)), + mutate(df, y = IQR(x)) + ) +}) + +test_that("hybrid not get in the way of order_by (#169)", { + df <- data.table(x = 10:1, y = 1:10) + res <- mutate(df, z = order_by(x, cumsum(y))) + expect_equal(res$z, rev(cumsum(10:1))) +}) + +test_that("mutate supports difftime objects (#390)", { + skip_if_dtplyr() + + df <- data.table( + grp = c(1, 1, 2, 2), + val = c(1, 3, 4, 6), + date1 = c(rep(Sys.Date() - 10, 2), rep(Sys.Date() - 20, 2)), + date2 = Sys.Date() + c(1, 2, 1, 2), + diffdate = difftime(date2, date1, unit = "days") + ) + + res <- df %>% + group_by(grp) %>% + mutate(mean_val = mean(val), mean_diffdate = mean(diffdate)) + expect_is(res$mean_diffdate, "difftime") + expect_equal(as.numeric(res$mean_diffdate), c(11.5, 11.5, 21.5, 21.5)) + + res <- df %>% group_by(grp) %>% summarise(dt = mean(diffdate)) + expect_is(res$dt, "difftime") + expect_equal(as.numeric(res$dt), c(11.5, 21.5)) +}) + +test_that("mutate works on zero-row grouped data frame (#596)", { + skip_if_dtplyr() + + dat <- data.table(a = numeric(0), b = character(0)) + res <- dat %>% group_by(b) %>% mutate(a2 = a * 2) + expect_is(res$a2, "numeric") + expect_is(res, "grouped_dt") + expect_equal(res$a2, numeric(0)) + expect_equal(attr(res, "indices"), list()) + expect_equal(attr(res, "vars"), "b") + expect_equal(attr(res, "group_sizes"), integer(0)) + expect_equal(attr(res, "biggest_group_size"), 0L) +}) + +test_that("Non-ascii column names in version 0.3 are not duplicated (#636)", { + skip_if_dtplyr() + df <- data.table(a = "1", b = "2") + names(df) <- c("a", enc2native("\u4e2d")) + + res <- df %>% mutate_all(as.numeric) %>% names() + expect_equal(res, names(df)) +}) + +test_that("nested hybrid functions do the right thing (#637)", { + res <- mtcars %>% mutate(mean(1)) + expect_true(all(res[["mean(1)"]] == 1L)) +}) + +test_that("mutate handles using and gathering complex data (#436)", { + skip_if_dtplyr() + + d <- data.table(x = 1:10, y = 1:10 + 2i) + res <- mutate(d, real = Re(y), imag = Im(y), z = 2 * y, constant = 2 + 2i) + expect_equal(names(res), c("x", "y", "real", "imag", "z", "constant")) + expect_equal(res$real, Re(d$y)) + expect_equal(res$imag, Im(d$y)) + expect_equal(res$z, d$y * 2) + expect_true(all(res$constant == 2 + 2i)) +}) + +test_that("mutate forbids POSIXlt results (#670)", { + skip_if_dtplyr() + + expect_error( + data.table(time = "2014/01/01 10:10:10") %>% + mutate(time = as.POSIXlt(time)), + "Column `time` is of unsupported class POSIXlt", + fixed = TRUE + ) + + expect_error( + data.table(time = "2014/01/01 10:10:10", a = 2) %>% + group_by(a) %>% + mutate(time = as.POSIXlt(time)), + "Column `time` is of unsupported class POSIXlt", + fixed = TRUE + ) + +}) + +test_that("constant factor can be handled by mutate (#715)", { + d <- data.table(x = 1:2) %>% mutate(y = factor("A")) + expect_true(is.factor(d$y)) + expect_equal(d$y, factor(c("A", "A"))) +}) + +test_that("row_number handles empty data frames (#762)", { + skip_if_dtplyr() + + df <- data.table(a = numeric(0)) + res <- df %>% mutate( + row_number_0 = row_number(), + row_number_a = row_number(a), + ntile = ntile(a, 2), + min_rank = min_rank(a), + percent_rank = percent_rank(a), + dense_rank = dense_rank(a), + cume_dist = cume_dist(a) + ) + expect_equal( + names(res), + c("a", "row_number_0", "row_number_a", "ntile", "min_rank", "percent_rank", "dense_rank", "cume_dist") + ) + expect_equal(nrow(res), 0L) +}) + +test_that("no utf8 invasion (#722)", { + skip_if_dtplyr() + skip_on_os("windows") + + source("utf-8.R", local = TRUE, encoding = "UTF-8") +}) + +test_that("mutate works on empty data frames (#1142)", { + skip_if_dtplyr() + df <- data.table() + res <- df %>% mutate + expect_equal(nrow(res), 0L) + expect_equal(length(res), 0L) + + res <- df %>% mutate(x = numeric()) + expect_equal(names(res), "x") + expect_equal(nrow(res), 0L) + expect_equal(length(res), 1L) +}) + +test_that("mutate handles 0 rows rowwise (#1300)", { + skip_if_dtplyr() + + a <- data.table(x = 1) + b <- data.table(y = character()) + + g <- function(y) { + 1 + } + f <- function() { + b %>% rowwise() %>% mutate(z = g(y)) + } + + res <- f() + expect_equal(nrow(res), 0L) + + expect_error( + a %>% mutate(b = f()), + "Column `b` must be length 1 (the number of rows), not 2", + fixed = TRUE + ) + expect_error( + a %>% rowwise() %>% mutate(b = f()), + "Column `b` must be length 1 (the group size), not 2", + fixed = TRUE + ) +}) + +test_that("regression test for #637", { + res <- mtcars %>% mutate(xx = mean(1)) + expect_true(all(res$xx == 1)) + + res <- mtcars %>% mutate(xx = sum(mean(mpg))) + expect_true(all(res$xx == sum(mean(mtcars$mpg)))) +}) + +test_that("mutate.rowwise handles factors (#886)", { + res <- data.table(processed = c("foo", "bar")) %>% + rowwise() %>% + mutate(processed_trafo = paste("test", processed)) + expect_equal(res$processed_trafo, c("test foo", "test bar")) +}) + +test_that("setting first column to NULL with mutate works (#1329)", { + skip_if_dtplyr() + + df <- data.table(x = 1:10, y = 1:10) + expect_equal(mutate(df, x = NULL), select(df, -x)) + expect_equal(mutate(df, y = NULL), select(df, -y)) + + gdf <- group_by(df, y) + expect_equal(select(gdf, -x), mutate(gdf, x = NULL)) +}) + +test_that("mutate handles the all NA case (#958)", { + x <- rep(c("Bob", "Jane"), each = 36) + y <- rep(rep(c("A", "B", "C"), each = 12), 2) + day <- rep(rep(1:12, 3), 2) + values <- rep(rep(c(10, 11, 30, 12, 13, 14, 15, 16, 17, 18, 19, 20), 3), 2) + + df <- data.table(x = x, y = y, day = day, values = values) + df$values[1:12] <- NA + + res <- df %>% + group_by(x, y) %>% + mutate(max.sum = day[which.max(values)[1]]) %>% + mutate(adjusted_values = if_else(day < max.sum, 30, values)) + expect_true(all(is.na(res$adjusted_values[1:12]))) +}) + +test_that("rowwise mutate gives expected results (#1381)", { + f <- function(x) ifelse(x < 2, NA_real_, x) + res <- data.table(x = 1:3) %>% rowwise() %>% mutate(y = f(x)) + expect_equal(res$y, c(NA, 2, 3)) +}) + +test_that("mutate handles factors (#1414)", { + d <- data.table( + g = c(1, 1, 1, 2, 2, 3, 3), + f = c("a", "b", "a", "a", "a", "b", "b") + ) + res <- d %>% group_by(g) %>% mutate(f2 = factor(f, levels = c("a", "b"))) + expect_equal(as.character(res$f2), res$f) +}) + +test_that("mutate handles results from one group with all NA values (#1463) ", { + skip_if_dtplyr() + + df <- data.table(x = c(1, 2), y = c(1, NA)) + res <- df %>% group_by(x) %>% mutate(z = ifelse(y > 1, 1, 2)) + expect_true(is.na(res$z[2])) + expect_is(res$z, "numeric") +}) + +test_that("rowwise mutate handles the NA special case (#1448)", { + res <- data.table(k = c(-1, 1, 1)) %>% + rowwise() %>% + mutate(l = ifelse(k > 0, 1, NA)) + expect_is(res$l, "numeric") + expect_true(is.na(res$l[1])) + expect_true(!anyNA(res$l[-1])) + + res <- data.table(k = rnorm(10)) %>% + rowwise() %>% + mutate(l = ifelse(k > 0, 1L, NA_integer_)) + expect_true(all(is.na(res$l[res$k <= 0]))) + expect_true(!any(is.na(res$l[res$k > 0]))) +}) + +test_that("mutate disambiguates NA and NaN (#1448)", { + Pass <- data.table(P2 = c(0, 3, 2), F2 = c(0, 2, 0), id = 1:3) + res <- Pass %>% + group_by(id) %>% + mutate(pass2 = P2 / (P2 + F2)) + expect_true(is.nan(res$pass2[1])) + + res <- Pass %>% + rowwise %>% + mutate(pass2 = P2 / (P2 + F2)) + expect_true(is.nan(res$pass2[1])) + + Pass <- data.table( + P1 = c(2L, 0L, 10L, 8L, 9L), + F1 = c(0L, 2L, 0L, 4L, 3L), + P2 = c(0L, 3L, 2L, 2L, 2L), + F2 = c(0L, 2L, 0L, 1L, 1L), + id = c(1, 2, 4, 4, 5) + ) + + res <- Pass %>% + group_by(id) %>% + mutate( + pass_rate = (P1 + P2) / (P1 + P2 + F1 + F2) * 100, + pass_rate1 = P1 / (P1 + F1) * 100, + pass_rate2 = P2 / (P2 + F2) * 100 + ) + expect_true(is.nan(res$pass_rate2[1])) +}) + +test_that("hybrid evaluator leaves formulas untouched (#1447)", { + skip_if_dtplyr() + + d <- data.table(g = 1:2, training = list(mtcars, mtcars * 2)) + mpg <- data.table(x = 1:10, y = 1:10) + res <- d %>% + group_by(g) %>% + mutate(lm_result = list(lm(mpg ~ wt, data = training[[1]]))) + expect_is(res$lm_result, "list") + expect_is(res$lm_result[[1]], "lm") + expect_is(res$lm_result[[2]], "lm") +}) + +test_that("lead/lag inside mutate handles expressions as value for default (#1411) ", { + df <- data.table(x = 1:3) + res <- mutate(df, leadn = lead(x, default = x[1]), lagn = lag(x, default = x[1])) + expect_equal(res$leadn, lead(df$x, default = df$x[1])) + expect_equal(res$lagn, lag(df$x, default = df$x[1])) + + res <- mutate(df, leadn = lead(x, default = c(1)), lagn = lag(x, default = c(1))) + expect_equal(res$leadn, lead(df$x, default = 1)) + expect_equal(res$lagn, lag(df$x, default = 1)) +}) + +test_that("grouped mutate does not drop grouping attributes (#1020)", { + d <- data.table(subject = c("Jack", "Jill"), id = c(2, 1)) %>% group_by(subject) + a1 <- names(attributes(d)) + a2 <- names(attributes(d %>% mutate(foo = 1))) + expect_equal(setdiff(a1, a2), character(0)) +}) + +test_that("grouped mutate coerces integer + double -> double (#1892)", { + skip_if_dtplyr() + + df <- data.table( + id = c(1, 4), + value = c(1L, NA), + group = c("A", "B") + ) %>% + group_by(group) %>% + mutate(value = ifelse(is.na(value), 0, value)) + expect_type(df$value, "double") + expect_identical(df$value, c(1, 0)) +}) + +test_that("grouped mutate coerces factor + character -> character (WARN) (#1892)", { + skip_if_dtplyr() + + factor_or_character <- function(x) { + if (x > 3) { + return(factor("hello")) + } else { + return("world") + } + } + + df <- data.table( + id = c(1, 4), + group = c("A", "B") + ) %>% + group_by(group) + expect_warning( + df <- df %>% + mutate(value = factor_or_character(id)) + ) + expect_type(df$value, "character") + expect_identical(df$value, c("world", "hello")) +}) + +test_that("lead/lag works on more complex expressions (#1588)", { + df <- data.table(x = rep(1:5, 2), g = rep(1:2, each = 5)) %>% group_by(g) + res <- df %>% mutate(y = lead(x > 3)) + expect_equal(res$y, rep(lead(1:5 > 3), 2)) +}) + +test_that("Adding a Column of NA to a Grouped Table gives expected results (#1645)", { + dataset <- data.table(A = 1:10, B = 10:1, group = factor(sample(LETTERS[25:26], 10, TRUE))) + res <- dataset %>% group_by(group) %>% mutate(prediction = factor(NA)) + expect_true(all(is.na(res$prediction))) + expect_is(res$prediction, "factor") + expect_equal(levels(res$prediction), character()) +}) + +test_that("Deep copies are performed when needed (#1463)", { + skip_if_dtplyr() + + res <- data.table(prob = c(F, T)) %>% + rowwise %>% + mutate(model = list(x = prob)) + expect_equal(unlist(res$model), c(FALSE, TRUE)) + + res <- data.table(x = 1:4, g = c(1, 1, 1, 2)) %>% + group_by(g) %>% + mutate(model = list(y = x)) + expect_equal(res$model[[1]], 1:3) + expect_equal(res$model[[4]], 4) +}) + +test_that("ntile falls back to R (#1750)", { + res <- mutate(iris, a = ntile("Sepal.Length", 3)) + expect_equal(res$a, rep(1, 150)) +}) + +test_that("mutate() names pronouns correctly (#2686)", { + skip_if_dtplyr() + + expect_named(mutate(data.table(x = 1), .data$x), "x") + expect_named(mutate(data.table(x = 1), .data[["x"]]), "x") +}) + +test_that("mutate() supports unquoted values", { + skip_if_dtplyr() + + df <- data.table(g = c(1, 1, 2, 2, 2), x = 1:5) + expect_identical(mutate(df, out = !! 1), mutate(df, out = 1)) + expect_identical(mutate(df, out = !! 1:5), mutate(df, out = 1:5)) + expect_identical(mutate(df, out = !! quote(1:5)), mutate(df, out = 1:5)) + expect_error(mutate(df, out = !! 1:2), "must be length 5 (the number of rows)", fixed = TRUE) + expect_error(mutate(df, out = !! get_env()), "unsupported type") + + gdf <- group_by(df, g) + expect_identical(mutate(gdf, out = !! 1), mutate(gdf, out = 1)) + expect_identical(mutate(gdf, out = !! 1:5), group_by(mutate(df, out = 1:5), g)) + expect_error(mutate(gdf, out = !! quote(1:5)), "must be length 2 (the group size)", fixed = TRUE) + expect_error(mutate(gdf, out = !! 1:2), "must be length 5 (the number of rows)", fixed = TRUE) + expect_error(mutate(gdf, out = !! get_env()), "unsupported type") +}) + + +# Error messages ---------------------------------------------------------- + +test_that("mutate fails gracefully on non-vector columns (#1803)", { + skip_if_dtplyr() + + df <- data.table(a = 1:3, b = as.raw(1:3)) + expect_error( + mutate(df, a = 1), + "Column `b` is of unsupported type raw vector", + fixed = TRUE + ) + expect_error( + mutate(df, b = 1), + "Column `b` is of unsupported type raw vector", + fixed = TRUE + ) + expect_error( + mutate(df, c = 1), + "Column `b` is of unsupported type raw vector", + fixed = TRUE + ) +}) + +test_that("grouped mutate errors on incompatible column type (#1641)", { + skip_if_dtplyr() + + expect_error( + data.table(x = 1) %>% mutate(y = mean), + "Column `y` is of unsupported type function", + fixed = TRUE + ) + expect_error( + data.table(x = 1) %>% mutate(y = quote(a)), + "Column `y` is of unsupported type symbol", + fixed = TRUE + ) +}) + +test_that("can reuse new variables", { + expect_equal( + data.table(c = 1) %>% mutate(c, gc = mean(c)), + data.table(c = 1, gc = 1) + ) +}) + + diff --git a/tests/testthat/test-select.R b/tests/testthat/test-select.R index 564d57f2e..5da9fc2df 100644 --- a/tests/testthat/test-select.R +++ b/tests/testthat/test-select.R @@ -1,4 +1,4 @@ -context("select") +context("Select") test_that("adds grouping variables", { res <- mtcars %>% tbl_dt() %>% group_by(vs) %>% select(mpg) @@ -15,3 +15,183 @@ test_that("select changes columns in copy of data table", { expect_equal(names(select(gdt, x, z = y)), c("x", "z")) expect_equal(names(gdt), c("x", "y")) }) + + +context("Select (tests from dplyr, May 2019):") + +expect_groups <- function(df, groups, info = NULL) { + if (length(groups) == 0L) { + expect_null(groups(df), info = info) + expect_identical(group_vars(df), character(), info = info) + } else { + expect_identical(groups(df), lapply(enc2native(groups), as.name), info = info) + expect_identical(group_vars(df), groups, info = info) + } +} + +expect_no_groups <- function(df) { + expect_groups(df, NULL) +} + + +mtcars_dt <- as.data.table(mtcars) + +test_that("select does not lose grouping (#147)", { + df <- data.table(a = rep(1:4, 2), b = rep(1:4, each = 2), x = runif(8)) + grouped <- df %>% group_by(a) %>% select(a, b, x) + + expect_groups(grouped, "a") +}) + +test_that("grouping variables preserved with a message (#1511)", { + df <- data.table(g = 1:3, x = 3:1) %>% group_by(g) + + expect_message(res <- select(df, x), "Adding missing grouping variables") + expect_named(res, c("g", "x")) +}) + +test_that("non-syntactic grouping variable is preserved (#1138)", { + df <- data.table(`a b` = 1L) %>% group_by(`a b`) %>% select() + expect_named(df, "a b") +}) + +test_that("select doesn't fail if some names missing", { + df1 <- data.table(x = 1:10, y = 1:10, z = 1:10) + df2 <- setNames(df1, c("x", "y", "")) + # df3 <- setNames(df1, c("x", "", "")) + + expect_equal(select(df1, x), data.table(x = 1:10)) + expect_equal(select(df2, x), data.table(x = 1:10)) + # expect_equal(select(df3, x), data.table(x = 1:10)) +}) + + +# Empty selects ------------------------------------------------- + +test_that("select with no args returns nothing", { + empty <- select(mtcars) + expect_equal(ncol(empty), 0) + expect_equal(nrow(empty), 32) +}) + +test_that("select excluding all vars returns nothing", { + expect_equal(dim(select(mtcars, -(mpg:carb))), c(32, 0)) + expect_equal(dim(select(mtcars, starts_with("x"))), c(32, 0)) + expect_equal(dim(select(mtcars, -matches("."))), c(32, 0)) +}) + +test_that("negating empty match returns everything", { + df <- data.table(x = 1:3, y = 3:1) + expect_equal(select(df, -starts_with("xyz")), df) +}) + +# Select variables ----------------------------------------------- + +test_that("select can be before group_by (#309)", { + df <- data.table( + id = c(1, 1, 2, 2, 2, 3, 3, 4, 4, 5), + year = c(2013, 2013, 2012, 2013, 2013, 2013, 2012, 2012, 2013, 2013), + var1 = rnorm(10) + ) + dfagg <- df %>% + group_by(id, year) %>% + select(id, year, var1) %>% + summarise(var1 = mean(var1)) + expect_equal(names(dfagg), c("id", "year", "var1")) +}) + +test_that("rename errors with invalid grouped data frame (#640)", { + skip_if_dtplyr() + df <- data.table(a = 1:3, b = 2:4, d = 3:5) %>% group_by(a, b) + df$a <- NULL + expect_error( + df %>% rename(e = d), + "not found in groups metadata" + ) + expect_error( + df %>% rename(e = b), + "not found in groups metadata" + ) +}) + +test_that("rename() handles data pronoun", { + expect_identical(rename(data.table(x = 1), y = .data$x), data.table(y = 1)) +}) + +test_that("select succeeds in presence of raw columns (#1803)", { + skip_if_dtplyr() + df <- data.table(a = 1:3, b = as.raw(1:3)) + expect_identical(select(df, a), df["a"]) + expect_identical(select(df, b), df["b"]) + expect_identical(select(df, -b), df["a"]) +}) + +test_that("arguments to select() don't match vars_select() arguments", { + skip_if_dtplyr() + df <- data.table(a = 1) + expect_identical(select(df, var = a), data.table(var = 1)) + expect_identical(select(group_by(df, a), var = a), group_by(data.table(var = 1), var)) + expect_identical(select(df, exclude = a), data.table(exclude = 1)) + expect_identical(select(df, include = a), data.table(include = 1)) + expect_identical(select(group_by(df, a), exclude = a), group_by(data.table(exclude = 1), exclude)) + expect_identical(select(group_by(df, a), include = a), group_by(data.table(include = 1), include)) +}) + +test_that("arguments to rename() don't match vars_rename() arguments (#2861)", { + skip_if_dtplyr() + df <- data.table(a = 1) + expect_identical(rename(df, var = a), data.table(var = 1)) + expect_identical(rename(group_by(df, a), var = a), group_by(data.table(var = 1), var)) + expect_identical(rename(df, strict = a), data.table(strict = 1)) + expect_identical(rename(group_by(df, a), strict = a), group_by(data.table(strict = 1), strict)) +}) + +test_that("can select() with .data pronoun (#2715)", { + expect_identical(select(mtcars, .data$cyl), select(mtcars, cyl)) +}) + +test_that("can select() with character vectors", { + expect_identical(select(mtcars, "cyl", !!"disp", c("cyl", "am", "drat")), mtcars[c("cyl", "disp", "am", "drat")]) +}) + +test_that("rename() to UTF-8 column names", { + skip_on_os("windows") # needs an rlang update? #3049 + df <- data.table(a = 1) %>% rename("\u5e78" := a) + + expect_equal(colnames(df), "\u5e78") +}) + +test_that("select() treats NULL inputs as empty", { + expect_identical(select(mtcars, cyl), select(mtcars, NULL, cyl, NULL)) +}) + +test_that("can select() or rename() with strings and character vectors", { + vars <- c(foo = "cyl", bar = "am") + + expect_identical(select(mtcars, !!!vars), select(mtcars, foo = cyl, bar = am)) + expect_identical(select(mtcars, !!vars), select(mtcars, foo = cyl, bar = am)) + + expect_identical(rename(mtcars, !!!vars), rename(mtcars, foo = cyl, bar = am)) + expect_identical(rename(mtcars, !!vars), rename(mtcars, foo = cyl, bar = am)) +}) + +test_that("select works on empty names (#3601)", { + df <- data.table(x=1, y=2, z=3) + colnames(df) <- c("x","y","") + expect_identical(select(df, x)$x, 1) + + colnames(df) <- c("","y","z") + expect_identical(select(df, y)$y, 2) +}) + +test_that("select works on NA names (#3601)", { + skip("to be discussed") + df <- data.table(x=1, y=2, z=3) + colnames(df) <- c("x","y",NA) + expect_identical(select(df, x)$x, 1) + + colnames(df) <- c(NA,"y","z") + expect_identical(select(df, y)$y, 2) +}) + + diff --git a/tests/testthat/test-slice.R b/tests/testthat/test-slice.R index 532993caf..4108ea992 100644 --- a/tests/testthat/test-slice.R +++ b/tests/testthat/test-slice.R @@ -13,3 +13,140 @@ test_that("slicing data table preserves input class", { expect_is(mtcars_dt %>% group_by(cyl) %>% slice(1), "grouped_dt") }) +context("slice (tests from dplyr)") + +mtcars_dt <- as.data.table(mtcars) + +test_that("slice handles numeric input (#226)", { + skip_if_dtplyr() + + g <- mtcars_dt %>% group_by(cyl) + res <- g %>% slice(1) + expect_equal(nrow(res), 3) + expect_equal(res, g %>% filter(row_number() == 1L)) + + expect_equal( + mtcars_dt %>% slice(1), + mtcars_dt %>% filter(row_number() == 1L) + ) +}) + +test_that("slice silently ignores out of range values (#226)", { + skip_if_dtplyr() + + expect_equal(slice(mtcars_dt, c(2, 100)), slice(mtcars_dt, 2)) + + g <- group_by(mtcars_dt, cyl) + expect_equal(slice(g, c(2, 100)), slice(g, 2)) + +}) + +test_that("slice works with 0 args", { + skip_if_dtplyr() + + expect_equivalent(slice(mtcars_dt), mtcars_dt) +}) + +test_that("slice works with negative indices", { + res <- slice(mtcars_dt, -(1:2)) + exp <- tail(mtcars_dt, -2) + expect_equal(names(res), names(exp)) + for (col in names(res)) { + expect_equal(res[[col]], exp[[col]]) + } +}) + +test_that("slice forbids positive and negative together", { + expect_error( + mtcars_dt %>% slice(c(-1, 2)), + "Cannot mix positives and negatives.", + fixed = TRUE + ) +}) + +test_that("slice works with grouped data", { + skip_if_dtplyr() + + g <- group_by(mtcars_dt, cyl) + + res <- slice(g, 1:2) + exp <- filter(g, row_number() < 3) + expect_equal(res, exp) + + res <- slice(g, -(1:2)) + exp <- filter(g, row_number() >= 3) + expect_equal(res, exp) + +}) + +test_that("slice gives correct rows (#649)", { + a <- tibble(value = paste0("row", 1:10)) + expect_equal(slice(a, 1:3)$value, paste0("row", 1:3)) + expect_equal(slice(a, c(4, 6, 9))$value, paste0("row", c(4, 6, 9))) + + a <- tibble( + value = paste0("row", 1:10), + group = rep(1:2, each = 5) + ) %>% + group_by(group) + + expect_equal(slice(a, 1:3)$value, paste0("row", c(1:3, 6:8))) + expect_equal(slice(a, c(2, 4))$value, paste0("row", c(2, 4, 7, 9))) +}) + +test_that("slice handles NA (#1235)", { + df <- tibble(x = 1:3) + expect_equal(nrow(slice(df, NA_integer_)), 0L) + expect_equal(nrow(slice(df, c(1L, NA_integer_))), 1L) + expect_equal(nrow(slice(df, c(-1L, NA_integer_))), 2L) + + df <- tibble(x = 1:4, g = rep(1:2, 2)) %>% group_by(g) + expect_equal(nrow(slice(df, NA)), 0L) + expect_equal(nrow(slice(df, c(1, NA))), 2) + expect_equal(nrow(slice(df, c(-1, NA))), 2) + +}) + +test_that("slice handles empty data frames (#1219)", { + df <- data.frame(x = numeric()) + res <- df %>% slice(1:3) + expect_equal(nrow(res), 0L) + expect_equal(names(res), "x") +}) + +test_that("slice works fine if n > nrow(df) (#1269)", { + skip_if_dtplyr() + + slice_res <- mtcars_dt %>% group_by(cyl) %>% slice(8) + filter_res <- mtcars_dt %>% group_by(cyl) %>% filter(row_number() == 8) + expect_equal(slice_res, filter_res) +}) + +test_that("slice strips grouped indices (#1405)", { + skip_if_dtplyr() + + res <- mtcars_dt %>% group_by(cyl) %>% slice(1) %>% mutate(mpgplus = mpg + 1) + expect_equal(nrow(res), 3L) + expect_equal(attr(res, "indices"), as.list(0:2)) +}) + +test_that("slice works with zero-column data frames (#2490)", { + expect_equal( + tibble(a = 1:3) %>% select(-a) %>% slice(1) %>% nrow, + 1L + ) +}) + +test_that("slice works under gctorture2", { + skip_if_dtplyr() + + x <- data.table(y = 1:10) + with_gctorture2(999, x2 <- slice(x, 1:10)) + expect_identical(x, x2) +}) + +test_that("slice correctly computes positive indices from negative indices (#3073)", { + x <- data.table(y = 1:10) + # data.table gives a warning here + expect_identical(suppressWarnings(slice(x, -10:-30)), data.table(y = 1:9)) +}) diff --git a/tests/testthat/test-summarise.R b/tests/testthat/test-summarise.R new file mode 100644 index 000000000..10584d974 --- /dev/null +++ b/tests/testthat/test-summarise.R @@ -0,0 +1,1060 @@ +context("Summarise (copied from dplyr") + +test_that("repeated outputs applied progressively", { + skip_if_dtplyr() + + df <- data.table(x = 5) + + out <- summarise(df, x = mean(x), x = x + 1) + expect_equal(nrow(out), 1) + expect_equal(ncol(out), 1) + + expect_equal(out$x, 6) +}) + +test_that("repeated outputs applied progressively (grouped_df)", { + skip_if_dtplyr() + + df <- data.table(x = c(1, 1), y = 1:2) + ds <- group_by(df, y) + out <- summarise(ds, z = mean(x), z = z + 1) + + expect_equal(nrow(out), 2) + expect_equal(ncol(out), 2) + + expect_equal(out$z, c(2L, 2L)) +}) + + +test_that("summarise peels off a single layer of grouping", { + df <- data.table(x = rep(1:4, each = 4), y = rep(1:2, each = 8), z = runif(16)) + grouped <- df %>% group_by(x, y, z) + expect_equal(group_vars(grouped), c("x", "y", "z")) + expect_equal(group_vars(grouped %>% summarise(n = n())), c("x", "y")) +}) + +test_that("summarise can refer to variables that were just created (#138)", { + skip_if_dtplyr() + + res <- summarise(tbl_dt(mtcars), cyl1 = mean(cyl), cyl2 = cyl1 + 1) + expect_equal(res$cyl2, mean(mtcars$cyl) + 1) + + gmtcars <- group_by(tbl_dt(mtcars), am) + res <- summarise(gmtcars, cyl1 = mean(cyl), cyl2 = cyl1 + 1) + res_direct <- summarise(gmtcars, cyl2 = mean(cyl) + 1) + expect_equal(res$cyl2, res_direct$cyl2) +}) + +test_that("summarise can refer to factor variables that were just created (#2217)", { + skip_if_dtplyr() + + df <- data.table(a = 1:3) %>% + group_by(a) + res <- df %>% + summarise(f = factor(if_else(a <= 1, "a", "b")), g = (f == "a")) + expect_equal( + res, + data.table(a = 1:3, f = factor(c("a", "b", "b")), g = c(TRUE, FALSE, FALSE)) + ) +}) + +test_that("summarise refuses to modify grouping variable (#143)", { + skip_if_dtplyr() + + df <- data.table(a = c(1, 2, 1, 2), b = c(1, 1, 2, 2), x = 1:4) + ds <- group_by(tbl_dt(df), a, b) + expect_error( + summarise(ds, a = mean(x), a = b + 1), + "Column `a` can't be modified because it's a grouping variable" + ) +}) + +test_that("summarise gives proper errors (#153)", { + skip_if_dtplyr() + + df <- data.table( + x = 1, + y = c(1, 2, 2), + z = runif(3) + ) + expect_error( + summarise(df, identity(NULL)), + "Column `identity(NULL)` is of unsupported type NULL", + fixed = TRUE + ) + expect_error( + summarise(df, log(z)), + "Column `log(z)` must be length 1 (a summary value), not 3", + fixed = TRUE + ) + expect_error( + summarise(df, y[1:2]), + "Column `y[1:2]` must be length 1 (a summary value), not 2", + fixed = TRUE + ) + expect_error( + summarise(df, env(a = 1)), + "Column `env(a = 1)` is of unsupported type environment", + fixed = TRUE + ) + + gdf <- group_by(df, x, y) + expect_error( + summarise(gdf, identity(NULL)), + "Column `identity(NULL)` is of unsupported type NULL", + fixed = TRUE + ) + expect_error( + summarise(gdf, z), + "Column `z` must be length 1 (a summary value), not 2", + fixed = TRUE + ) + expect_error( + summarise(gdf, log(z)), + "Column `log(z)` must be length 1 (a summary value), not 2", + fixed = TRUE + ) + expect_error( + summarise(gdf, y[1:2]), + "Column `y[1:2]` must be length 1 (a summary value), not 2", + fixed = TRUE + ) + expect_error( + summarise(gdf, env(a = 1)), + "Column `env(a = 1)` is of unsupported type environment", + fixed = TRUE + ) +}) + +test_that("summarise handles constants (#153)", { + df <- data.table(a = 1:4) + today <- Sys.Date() + now <- Sys.time() + + res <- summarise( + tbl_dt(df), + int = 1L, num = 1, str = "foo", bool = TRUE, date = today, time = now + ) + expect_equal(res$int, 1L) + expect_equal(res$num, 1.0) + expect_equal(res$str, "foo") + expect_equal(res$bool, TRUE) + expect_equal(res$date, today) + expect_equal(res$time, now) + + res <- summarise( + group_by(df, a), + int = 1L, num = 1, str = "foo", bool = TRUE, date = today, time = now + ) + expect_equal(res$int, rep(1L, 4)) + expect_equal(res$num, rep(1.0, 4)) + expect_equal(res$str, rep("foo", 4)) + expect_equal(res$bool, rep(TRUE, 4)) + expect_equal(res$date, rep(today, 4)) + expect_equal(res$time, rep(now, 4)) + +}) + +test_that("summarise handles passing ...", { + skip_if_dtplyr() + + df <- data.table(x = 1:4) + + f <- function(...) { + x1 <- 1 + f1 <- function(x) x + summarise(df, ..., x1 = f1(x1)) + } + g <- function(...) { + x2 <- 2 + f(x2 = x2, ...) + } + h <- function(before = "before", ..., after = "after") { + g(before = before, ..., after = after) + } + + res <- h(x3 = 3) + expect_equal(res$x1, 1) + expect_equal(res$x2, 2) + expect_equal(res$before, "before") + expect_equal(res$after, "after") + + df <- tbl_dt(df) + res <- h(x3 = 3) + expect_equal(res$x1, 1) + expect_equal(res$x2, 2) + expect_equal(res$before, "before") + expect_equal(res$after, "after") + + df <- group_by(df, x) + res <- h(x3 = 3) + expect_equal(res$x1, rep(1, 4)) + expect_equal(res$x2, rep(2, 4)) + expect_equal(res$before, rep("before", 4)) + expect_equal(res$after, rep("after", 4)) + +}) + +test_that("summarise propagate attributes (#194)", { + df <- data.table( + b = rep(1:2, 2), + f = Sys.Date() + 1:4, + g = Sys.time() + 1:4, + stringsAsFactors = FALSE + ) %>% + group_by(b) + + min_ <- min + res <- summarise(df, + min_f = min(f), + max_f = max(f), + min_g = min(g), + max_g = max(g), + min__f = min_(f), + min__g = min_(g) + ) + + expect_equal(class(res$min_f) , "Date") + expect_equal(class(res$max_f) , "Date") + expect_equal(class(res$min__f), "Date") + + expect_equal(class(res$min_g) , c("POSIXct", "POSIXt")) + expect_equal(class(res$max_g) , c("POSIXct", "POSIXt")) + expect_equal(class(res$min__g), c("POSIXct", "POSIXt")) + +}) + +test_that("summarise strips names, but only if grouped (#2231, #2675)", { + skip_if_dtplyr() + + data <- data.table(a = 1:3) %>% summarise(b = setNames(nm = a[[1]])) + expect_equal(names(data$b), "1") + + data <- data.table(a = 1:3) %>% rowwise %>% summarise(b = setNames(nm = a)) + expect_null(names(data$b)) + + data <- data.table(a = c(1, 1, 2)) %>% group_by(a) %>% summarise(b = setNames(nm = a[[1]])) + expect_null(names(data$b)) +}) + +test_that("summarise fails on missing variables", { + # error messages from rlang + expect_error(summarise(mtcars, a = mean(notthear))) +}) + +test_that("summarise fails on missing variables when grouping (#2223)", { + # error messages from rlang + expect_error(summarise(group_by(mtcars, cyl), a = mean(notthear))) +}) + +test_that("n() does not accept arguments", { + expect_error( + summarise(group_by(as.data.table(mtcars), cyl), n(hp)), + "unused argument", + fixed = TRUE + ) +}) + +test_that("hybrid nests correctly", { + res <- group_by(mtcars, cyl) %>% + summarise(a = if (n() > 10) 1 else 2) + expect_equal(res$a, c(1, 2, 1)) + + res <- mtcars %>% summarise(a = if (n() > 10) 1 else 2) + expect_equal(res$a, 1) +}) + +test_that("hybrid min and max propagate attributes (#246)", { + x <- data.table( + id = c(rep("a", 2), rep("b", 2)), + date = as.POSIXct(c("2014-01-13", "2014-01-14", "2014-01-15", "2014-01-16"), tz = "GMT") + ) + y <- x %>% group_by(id) %>% summarise(max_date = max(date), min_date = min(date)) + + expect_true("tzone" %in% names(attributes(y$min_date))) + expect_true("tzone" %in% names(attributes(y$max_date))) +}) + +test_that("summarise can use newly created variable more than once", { + skip_if_dtplyr() + + df <- data.table(id = c(1, 1, 2, 2, 3, 3), a = 1:6) %>% group_by(id) + for (i in 1:10) { + res <- summarise( + df, + biggest = max(a), + smallest = min(a), + diff1 = biggest - smallest, + diff2 = smallest - biggest + ) + expect_equal(res$diff1, -res$diff2) + } +}) + +test_that("summarise creates an empty data frame when no parameters are used", { + skip_if_dtplyr() + + res <- summarise(mtcars) + expect_equal(res, data.table()) +}) + +test_that("integer overflow (#304)", { + skip_if_dtplyr() + + groups <- rep(c("A", "B"), each = 3) + values <- rep(1e9, 6) + dat <- data.table(groups, X1 = as.integer(values), X2 = values) + # now group and summarise + expect_warning( + res <- group_by(dat, groups) %>% + summarise(sum_integer = sum(X1), sum_numeric = sum(X2)), + "integer overflow" + ) + expect_true(all(is.na(res$sum_integer))) + expect_equal(res$sum_numeric, rep(3e9, 2L)) +}) + +test_that("summarise checks outputs (#300)", { + expect_error( + summarise(mtcars, mpg, cyl), + "Column `mpg` must be length 1 (a summary value), not 32", + fixed = TRUE + ) + expect_error( + summarise(mtcars, mpg + cyl), + "Column `mpg + cyl` must be length 1 (a summary value), not 32", + fixed = TRUE + ) +}) + +test_that("comment attribute is white listed (#346)", { + test <- data.table(A = c(1, 1, 0, 0), B = c(2, 2, 3, 3)) + comment(test$B) <- "2nd Var" + res <- group_by(test, A) + expect_equal(comment(res$B), "2nd Var") +}) + +test_that("AsIs class is white listed (#453)", { + test <- data.table(A = c(1, 1, 0, 0), B = I(c(2, 2, 3, 3))) + res <- group_by(test, B) + expect_equal(res$B, test$B) +}) + +test_that("names attribute is not retained (#357)", { + df <- data.table(x = c(1:3), y = letters[1:3]) + df <- group_by(df, y) + m <- df %>% summarise( + a = length(x), + b = quantile(x, 0.5) + ) + expect_equal(m$b, c(1, 2, 3)) + expect_null(names(m$b)) +}) + +test_that("na.rm is supported (#168)", { + df <- data.table( + x = c(1:5, NA, 7:10), + y = rep(1:2, each = 5), + z = c(rnorm(5), NA, rnorm(4)) + ) + res <- df %>% + group_by(y) %>% + summarise( + mean_x = mean(x, na.rm = FALSE), + mean_z = mean(z, na.rm = FALSE), + min_x = min(x, na.rm = FALSE), + min_z = min(z, na.rm = FALSE) + ) + expect_equal(res$mean_x[1], 3) + expect_true(is.na(res$mean_x[2])) + expect_equal(res$mean_z[1], mean(df$z[1:5])) + expect_true(is.na(res$mean_z[2])) + + expect_equal(res$min_x[1], 1) + expect_true(is.na(res$min_x[2])) + expect_equal(res$min_z[1], min(df$z[1:5])) + expect_true(is.na(res$min_z[2])) + + res <- df %>% + group_by(y) %>% + summarise( + mean_x = mean(x, na.rm = TRUE), + mean_z = mean(z, na.rm = TRUE), + min_x = min(x, na.rm = TRUE), + min_z = min(z, na.rm = TRUE) + ) + expect_equal(res$mean_x[1], 3) + expect_equal(res$mean_x[2], 8.5) + expect_equal(res$mean_z[1], mean(df$z[1:5])) + expect_equal(res$mean_z[2], mean(df$z[7:10])) + + expect_equal(res$min_x[1], 1) + expect_equal(res$min_x[2], 7) + expect_equal(res$min_z[1], min(df$z[1:5])) + expect_equal(res$min_z[2], min(df$z[7:10])) + +}) + +test_that("summarise hybrid functions can use summarized variables", { + skip_if_dtplyr() + + df <- data.table(x = c(1:5, NA, 7:10), y = rep(1:2, each = 5)) %>% group_by(y) + res <- summarise( + df, + x = mean(x), + min = min(x), + max = max(x), + mean = mean(x), + var = var(x) + ) + expect_identical(res$x, res$min) + expect_identical(res$x, res$max) + expect_identical(res$x, res$mean) + expect_identical(res$var, rep(NA_real_, 2)) +}) + +test_that("LazySubset is not confused about input data size (#452)", { + res <- data.table(a = c(10, 100)) %>% summarise(b = sum(a), c = sum(a) * 2) + expect_equal(res$b, 110) + expect_equal(res$c, 220) +}) + +test_that("nth, first, last promote dates and times (#509)", { + data <- data.table( + ID = rep(letters[1:4], each = 5), + date = Sys.Date() + 1:20, + time = Sys.time() + 1:20, + number = rnorm(20) + ) + res <- data %>% + group_by(ID) %>% + summarise( + date2 = nth(date, 2), + time2 = nth(time, 2), + first_date = first(date), + last_date = last(date), + first_time = first(time), + last_time = last(time) + ) + expect_is(res$date2, "Date") + expect_is(res$first_date, "Date") + expect_is(res$last_date, "Date") + expect_is(res$time2, "POSIXct") + expect_is(res$first_time, "POSIXct") + expect_is(res$last_time, "POSIXct") + # error messages from rlang + expect_error(data %>% group_by(ID) %>% summarise(time2 = nth(times, 2))) +}) + +test_that("nth, first, last preserves factor data (#509)", { + skip_if_dtplyr() + + dat <- data.table(a = rep(seq(1, 20, 2), 3), b = as.ordered(a)) + dat1 <- dat %>% + group_by(a) %>% + summarise( + der = nth(b, 2), + first = first(b), + last = last(b) + ) + expect_is(dat1$der, "ordered") + expect_is(dat1$first, "ordered") + expect_is(dat1$last, "ordered") + expect_equal(levels(dat1$der), levels(dat$b)) +}) + +test_that("nth handle negative value (#1584) ", { + skip_if_dtplyr() + + df <- data.table( + a = 1:10, b = 10:1, + g = rep(c(1, 2), c(4, 6)) + ) %>% + group_by(g) + + res <- summarise( + df, + x1 = nth(a, -1L), + x2 = nth(a, -1L, order_by = b), + x3 = nth(a, -5L), + x4 = nth(a, -5L, order_by = b), + x5 = nth(a, -5L, default = 99), + x6 = nth(a, -5L, order_by = b, default = 99) + ) + expect_equal(res$x1, c(4, 10)) + expect_equal(res$x2, c(1, 5)) + expect_true(is.na(res$x3[1])) + expect_equal(res$x3[2], 6) + expect_true(is.na(res$x4[1])) + expect_equal(res$x4[2], 9) + expect_equal(res$x5, c(99, 6)) + expect_equal(res$x6, c(99, 9)) + +}) + +test_that("LazyGroupSubsets is robust about columns not from the data (#600)", { + foo <- data.table(x = 1:10, y = 1:10) + # error messages from rlang + expect_error(foo %>% group_by(x) %>% summarise(first_y = first(z))) +}) + +test_that("can summarise first(x[-1]) (#1980)", { + skip_if_dtplyr() + + expect_equal( + tbl_dt(x = 1:3) %>% summarise(f = first(x[-1])), + tbl_dt(f = 2L) + ) +}) + +test_that("hybrid eval handles $ and @ (#645)", { + tmp <- expand.grid(a = 1:3, b = 0:1, i = 1:10) + g <- tmp %>% group_by(a) + + f <- function(a, b) { + list(x = 1:10) + } + + res <- g %>% summarise( + r = sum(b), + n = length(b), + p = f(r, n)$x[1] + ) + expect_equal(names(res), c("a", "r", "n", "p")) + + res <- tmp %>% summarise( + r = sum(b), + n = length(b), + p = f(r, n)$x[1] + ) + expect_equal(names(res), c("r", "n", "p")) + +}) + +test_that("argument order_by in last is flexible enough to handle more than just a symbol (#626)", { + skip_if_dtplyr() + + res1 <- group_by(mtcars, cyl) %>% + summarise( + big = last(mpg[drat > 3], order_by = wt[drat > 3]), + small = first(mpg[drat > 3], order_by = wt[drat > 3]), + second = nth(mpg[drat > 3], 2, order_by = wt[drat > 3]) + ) + + # turning off lazy eval + last. <- last + first. <- first + nth. <- nth + res2 <- group_by(mtcars, cyl) %>% + summarise( + big = last.(mpg[drat > 3], order_by = wt[drat > 3]), + small = first.(mpg[drat > 3], order_by = wt[drat > 3]), + second = nth.(mpg[drat > 3], 2, order_by = wt[drat > 3]) + ) + expect_equal(res1, res2) + +}) + +test_that("min(., na.rm=TRUE) correctly handles Dates that are coded as REALSXP (#755)", { + dates <- as.Date(c("2014-01-01", "2013-01-01")) + dd <- data.table(Dates = dates) + res <- summarise(dd, Dates = min(Dates, na.rm = TRUE)) + expect_is(res$Dates, "Date") + expect_equal(res$Dates, as.Date("2013-01-01")) +}) + +test_that("nth handles expressions for n argument (#734)", { + df <- data.table(x = c(1:4, 7:9, 13:19), y = sample(100:999, 14)) + idx <- which(df$x == 16) + res <- df %>% summarize(abc = nth(y, n = which(x == 16))) + expect_equal(res$abc, df$y[idx]) +}) + +test_that("summarise is not polluted by logical NA (#599)", { + skip_if_dtplyr() + + dat <- data.table(grp = rep(1:4, each = 2), val = c(NA, 2, 3:8)) + Mean <- function(x, thresh = 2) { + res <- mean(x, na.rm = TRUE) + if (res > thresh) res else NA + } + res <- dat %>% group_by(grp) %>% summarise(val = Mean(val, thresh = 2)) + expect_is(res$val, "numeric") + expect_true(is.na(res$val[1])) +}) + +test_that("summarise handles list output columns (#832)", { + skip_if_dtplyr() + + df <- data.table(x = 1:10, g = rep(1:2, each = 5)) + res <- df %>% group_by(g) %>% summarise(y = list(x)) + expect_equal(res$y[[1]], 1:5) + expect_equal(res$y[[2]], 6:10) + # just checking objects are not messed up internally + expect_equal(gp(res$y[[1]]), 0L) + expect_equal(gp(res$y[[2]]), 0L) + + res <- df %>% group_by(g) %>% summarise(y = list(x + 1)) + expect_equal(res$y[[1]], 1:5 + 1) + expect_equal(res$y[[2]], 6:10 + 1) + # just checking objects are not messed up internally + expect_equal(gp(res$y[[1]]), 0L) + expect_equal(gp(res$y[[2]]), 0L) + + df <- data.table(x = 1:10, g = rep(1:2, each = 5)) + res <- df %>% summarise(y = list(x)) + expect_equal(res$y[[1]], 1:10) + res <- df %>% summarise(y = list(x + 1)) + expect_equal(res$y[[1]], 1:10 + 1) + +}) + +test_that("summarise works with empty data frame (#1142)", { + skip_if_dtplyr() + + df <- data.table() + res <- df %>% summarise + expect_equal(nrow(res), 0L) + expect_equal(length(res), 0L) +}) + +test_that("n_distint uses na.rm argument", { + df <- data.table(x = c(1:3, NA), g = rep(1:2, 2)) + res <- summarise(df, n = n_distinct(x, na.rm = TRUE)) + expect_equal(res$n, 3L) + + res <- group_by(df, g) %>% summarise(n = n_distinct(x, na.rm = TRUE)) + expect_equal(res$n, c(2L, 1L)) + +}) + +test_that("n_distinct front end supports na.rm argument (#1052)", { + x <- c(1:3, NA) + expect_equal(n_distinct(x, na.rm = TRUE), 3L) +}) + +test_that("n_distinct without arguments stops (#1957)", { + expect_error( + n_distinct(), + "Need at least one column for `n_distinct()`", + fixed = TRUE + ) +}) + +test_that("hybrid evaluation does not take place for objects with a class (#1237)", { + mean.foo <- function(x) 42 + df <- data.table(x = structure(1:10, class = "foo")) + expect_equal(summarise(df, m = mean(x))$m[1], 42) + + env <- environment() + Foo <- suppressWarnings(setClass("Foo", contains = "numeric", where = env)) + suppressMessages(setMethod("mean", "Foo", function(x, ...) 42, where = env)) + on.exit(removeClass("Foo", where = env)) + + df <- data.table(x = Foo(c(1, 2, 3))) + expect_equal(summarise(df, m = mean(x))$m[1], 42) +}) + +test_that("summarise handles promotion of results (#893)", { + skip_if_dtplyr() + + df <- structure(list( + price = c(580L, 650L, 630L, 706L, 1080L, 3082L, 3328L, 4229L, 1895L, + 3546L, 752L, 13003L, 814L, 6115L, 645L, 3749L, 2926L, 765L, + 1140L, 1158L), + cut = structure(c(2L, 4L, 4L, 2L, 3L, 2L, 2L, 3L, 4L, 1L, 1L, 3L, 2L, + 4L, 3L, 3L, 1L, 2L, 2L, 2L), + .Label = c("Good", "Ideal", "Premium", "Very Good"), + class = "factor")), + row.names = c(NA, -20L), + .Names = c("price", "cut"), + class = "data.table" + ) + res <- df %>% + group_by(cut) %>% + select(price) %>% + summarise(price = median(price)) + expect_is(res$price, "numeric") + +}) + +test_that("summarise correctly handles logical (#1291)", { + test <- expand.grid(id = 1:2, type = letters[1:2], sample = 1:2) %>% + mutate(var = c(1, 0, 1, 1, 0, 0, 0, 1)) %>% + mutate(var_l = as.logical(var)) %>% + mutate(var_ch = as.character(var_l)) %>% + arrange(id, type, sample) %>% + group_by(id, type) + test_sum <- test %>% + ungroup() %>% + group_by(id, type) %>% + summarise( + anyvar = any(var == 1), + anyvar_l = any(var_l), + anyvar_ch = any(var_ch == "TRUE") + ) + + expect_equal(test_sum$anyvar, c(TRUE, TRUE, FALSE, TRUE)) + +}) + +test_that("summarise correctly handles NA groups (#1261)", { + tmp <- data.table( + a = c(1, 1, 1, 2, 2), + b1 = NA_integer_, + b2 = NA_character_ + ) + + res <- tmp %>% group_by(a, b1) %>% summarise(n()) + expect_equal(nrow(res), 2L) + res <- tmp %>% group_by(a, b2) %>% summarise(n()) + expect_equal(nrow(res), 2L) +}) + +test_that("n_distinct handles multiple columns (#1084)", { + df <- data.table( + x = rep(1:4, each = 2), + y = rep(1:2, each = 4), + g = rep(1:2, 4) + ) + res <- summarise(df, n = n_distinct(x, y)) + expect_equal(res$n, 4L) + + res <- group_by(df, g) %>% summarise(n = n_distinct(x, y)) + expect_equal(res$n, c(4L, 4L)) + + df$x[3] <- df$y[7] <- NA + res <- summarise(df, n = n_distinct(x, y)) + expect_equal(res$n, 6L) + res <- summarise(df, n = n_distinct(x, y, na.rm = TRUE)) + expect_equal(res$n, 4L) + + res <- group_by(df, g) %>% summarise(n = n_distinct(x, y)) + expect_equal(res$n, c(4L, 4L)) + + res <- group_by(df, g) %>% summarise(n = n_distinct(x, y, na.rm = TRUE)) + expect_equal(res$n, c(2L, 4L)) +}) + +test_that("hybrid max works when not used on columns (#1369)", { + df <- data.table(x = 1:1000) + y <- 1:10 + expect_equal(summarise(df, z = max(y))$z, 10) + expect_equal(summarise(df, z = max(10))$z, 10) +}) + +test_that("min and max handle empty sets in summarise (#1481)", { + skip_if_dtplyr() + + df <- data.table(A = numeric()) + res <- df %>% summarise(Min = min(A, na.rm = TRUE), Max = max(A, na.rm = TRUE)) + expect_equal(res$Min, Inf) + expect_equal(res$Max, -Inf) +}) + +test_that("lead and lag behave correctly in summarise (#1434)", { + skip_if_dtplyr() + res <- mtcars %>% + group_by(cyl) %>% + summarise( + n = n(), + leadn = lead(n), + lagn = lag(n), + leadn10 = lead(n, default = 10), + lagn10 = lag(n, default = 10) + ) + expect_true(all(is.na(res$lagn))) + expect_true(all(is.na(res$leadn))) + expect_true(all(res$lagn10 == 10)) + expect_true(all(res$leadn10 == 10)) + + res <- mtcars %>% + rowwise() %>% + summarise( + n = n(), + leadn = lead(n), + lagn = lag(n), + leadn10 = lead(n, default = 10), + lagn10 = lag(n, default = 10) + ) + expect_true(all(is.na(res$lagn))) + expect_true(all(is.na(res$leadn))) + expect_true(all(res$lagn10 == 10)) + expect_true(all(res$leadn10 == 10)) + +}) + +# .data and .env tests now in test-hybrid-traverse.R + +test_that("data.table columns are supported in summarise (#1425)", { + skip_if_dtplyr() + + df <- data.table(x1 = rep(1:3, times = 3), x2 = 1:9) + df$x3 <- df %>% mutate(x3 = x2) + res <- df %>% group_by(x1) %>% summarise(nr = nrow(x3)) + expect_true(all(res$nr == 3)) +}) + +test_that("summarise handles min/max of already summarised variable (#1622)", { + skip_if_dtplyr() + + df <- data.table( + FIRST_DAY = rep(seq(as.POSIXct("2015-12-01", tz = "UTC"), length.out = 2, by = "days"), 2), + event = c("a", "a", "b", "b") + ) + + df_summary <- df %>% + group_by(event) %>% + summarise(FIRST_DAY = min(FIRST_DAY), LAST_DAY = max(FIRST_DAY)) + expect_equal(df_summary$FIRST_DAY, df_summary$LAST_DAY) +}) + +test_that("group_by keeps classes (#1631)", { + df <- data.table(a = 1, b = as.Date(NA)) %>% + group_by(a) %>% + summarize(c = min(b)) + expect_equal(class(df$c), "Date") + + df <- data.table(a = 1, b = as.POSIXct(NA)) %>% + group_by(a) %>% + summarize(c = min(b)) + expect_equal(class(df$c), c("POSIXct", "POSIXt")) + +}) + +test_that("hybrid n_distinct falls back to R evaluation when needed (#1657)", { + dat3 <- data.table(id = c(2, 6, 7, 10, 10)) + res <- dat3 %>% summarise(n_unique = n_distinct(id[id > 6])) + expect_equal(res$n_unique, 2) +}) + +test_that("summarise() correctly coerces factors with different levels (#1678)", { + skip_if_dtplyr() + + res <- data.table(x = 1:3) %>% + group_by(x) %>% + summarise( + y = if (x == 1) "a" else "b", + z = factor(y) + ) + expect_is(res$z, "factor") + expect_equal(levels(res$z), c("a", "b")) + expect_equal(as.character(res$z), c("a", "b", "b")) +}) + +test_that("summarise works if raw columns exist but are not involved (#1803)", { + df <- data.table(a = 1:3, b = as.raw(1:3)) + expect_equal(summarise(df, c = sum(a)), data.table(c = 6L)) +}) + +test_that("summarise fails gracefully on raw columns (#1803)", { + skip_if_dtplyr() + + df <- data.table(a = 1:3, b = as.raw(1:3)) + expect_error( + summarise(df, c = b[[1]]), + "Column `c` is of unsupported type raw vector", + fixed = TRUE + ) +}) + +test_that("dim attribute is stripped from grouped summarise (#1918)", { + skip_if_dtplyr() + + df <- data.table(a = 1:3, b = 1:3) + + df_regular <- summarise(df, b = scale(b)[1, 1]) + df_grouped <- summarise(group_by(df, a), b = scale(b)) + df_rowwise <- summarise(rowwise(df), b = scale(b)) + + expect_null(dim(df$b)) + expect_null(dim(df_grouped$b)) + expect_null(dim(df_rowwise$b)) +}) + +test_that("typing and NAs for grouped summarise (#1839)", { + skip_if_dtplyr() + + expect_identical( + data.table(id = 1L, a = NA_character_) %>% + group_by(id) %>% + summarise(a = a[[1]]) %>% + .$a, + NA_character_) + + expect_identical( + data.table(id = 1:2, a = c(NA, "a")) %>% + group_by(id) %>% + summarise(a = a[[1]]) %>% + .$a, + c(NA, "a")) + + # Properly upgrade NA (logical) to character + expect_identical( + data.table(id = 1:2, a = 1:2) %>% + group_by(id) %>% + summarise(a = ifelse(all(a < 2), NA, "yes")) %>% + .$a, + c(NA, "yes")) + + expect_error( + data.table(id = 1:2, a = list(1, "2")) %>% + group_by(id) %>% + summarise(a = a[[1]]) %>% + .$a, + # "Column `a` can't promote group 1 to numeric", + fixed = TRUE + ) + + expect_identical( + data.table(id = 1:2, a = list(1, "2")) %>% + group_by(id) %>% + summarise(a = a[1]) %>% + .$a, + list(1, "2")) +}) + +test_that("typing and NAs for rowwise summarise (#1839)", { + skip_if_dtplyr() + + expect_identical( + data.table(id = 1L, a = NA_character_) %>% + rowwise %>% + summarise(a = a[[1]]) %>% + .$a, + NA_character_) + + expect_identical( + data.table(id = 1:2, a = c(NA, "a")) %>% + rowwise %>% + summarise(a = a[[1]]) %>% + .$a, + c(NA, "a")) + + # Properly promote NA (logical) to character + expect_identical( + data.table(id = 1:2, a = 1:2) %>% + group_by(id) %>% + summarise(a = ifelse(all(a < 2), NA, "yes")) %>% + .$a, + c(NA, "yes")) + + expect_error( + data.table(id = 1:2, a = list(1, "2")) %>% + rowwise %>% + summarise(a = a[[1]]) %>% + .$a, + "Column `a` can't promote group 1 to numeric", + fixed = TRUE + ) + + expect_error( + data.table(id = 1:2, a = list(1, "2")) %>% + rowwise %>% + summarise(a = a[1]) %>% + .$a, + "Column `a` can't promote group 1 to numeric", + fixed = TRUE + ) +}) + +test_that("calculating an ordered factor preserves order (#2200)", { + skip_if_dtplyr() + + test_df <- data.table( + id = c("a", "b"), + val = 1:2 + ) + + ret <- group_by(test_df, id) %>% + summarize(level = ordered(val)) + + expect_s3_class(ret$level, "ordered") + expect_equal(levels(ret$level), c("1", "2")) +}) + +test_that("min, max preserves ordered factor data (#2200)", { + skip_if_dtplyr() + + test_df <- tbl_dt( + id = rep(c("a", "b"), 2), + ord = ordered(c("A", "B", "B", "A"), levels = c("A", "B")) + ) + + ret <- group_by(test_df, id) %>% + summarize( + min_ord = min(ord), + max_ord = max(ord) + ) + + expect_s3_class(ret$min_ord, "ordered") + expect_s3_class(ret$max_ord, "ordered") + expect_equal(levels(ret$min_ord), levels(test_df$ord)) + expect_equal(levels(ret$max_ord), levels(test_df$ord)) +}) + +test_that("ungrouped summarise() uses summary variables correctly (#2404)", { + skip_if_dtplyr() + + df <- tbl_dt(seq(1:10)) + + out <- df %>% summarise(value = mean(value), sd = sd(value)) + expect_equal(out$value, 5.5) + expect_equal(out$sd, NA_real_) +}) + +test_that("proper handling of names in summarised list columns (#2231)", { + d <- data.table(x = rep(1:3, 1:3), y = 1:6, names = letters[1:6]) + res <- d %>% group_by(x) %>% summarise(y = list(setNames(y, names))) + expect_equal(names(res$y[[1]]), letters[[1]]) + expect_equal(names(res$y[[2]]), letters[2:3]) + expect_equal(names(res$y[[3]]), letters[4:6]) +}) + +test_that("proper handling of NA factors (#2588)", { + skip_if_dtplyr() + + df <- tbl_dt( + x = c(1, 1, 2, 2, 3, 3), + y = factor(c(NA, NA, NA, "2", "3", "3")) + ) + + ret <- df %>% group_by(x) %>% summarise(y = y[1]) + expect_identical(as.character(ret$y), c(NA, NA, "3")) +}) + +test_that("can refer to previously summarised symbols", { + skip_if_dtplyr() + + expect_identical(summarise(group_by(mtcars, cyl), x = 1, z = x)[2:3], tbl_dt(x = c(1, 1, 1), z = x)) + expect_identical(summarise(group_by(mtcars, cyl), x = n(), z = x)[2:3], tbl_dt(x = c(11L, 7L, 14L), z = x)) +}) + +test_that("can refer to symbols if group size is one overall", { + skip_if_dtplyr() + + df <- tbl_dt(x = LETTERS[3:1], y = 1:3) + expect_identical( + df %>% + group_by(x) %>% + summarise(z = y), + tbl_dt(x = LETTERS[1:3], z = 3:1) + ) +}) + +test_that("summarise() supports unquoted values", { + skip_if_dtplyr() + + df <- tbl_dt(g = c(1, 1, 2, 2, 2), x = 1:5) + expect_identical(summarise(df, out = !! 1), tbl_dt(out = 1)) + expect_identical(summarise(df, out = !! quote(identity(1))), tbl_dt(out = 1)) + expect_error(summarise(df, out = !! 1:2), "must be length 1 (the number of groups)", fixed = TRUE) + expect_error(summarise(df, out = !! env(a = 1)), "unsupported type") + + gdf <- group_by(df, g) + expect_identical(summarise(gdf, out = !! 1), summarise(gdf, out = 1)) + expect_identical(summarise(gdf, out = !! 1:2), tbl_dt(g = c(1, 2), out = 1:2)) + expect_identical(summarise(gdf, out = !! quote(identity(1))), summarise(gdf, out = 1)) + expect_error(summarise(gdf, out = !! 1:5), "must be length 2 (the number of groups)", fixed = TRUE) + expect_error(summarise(gdf, out = !! env(a = 1)), "unsupported type") +})