|
| 1 | +test_that("duckdb n_distinct() basic", { |
| 2 | + withr::local_envvar(DUCKPLYR_FORCE = TRUE) |
| 3 | + |
| 4 | + df <- duckdb_tibble( |
| 5 | + a = c(1, 1, 2, 2, 2), |
| 6 | + b = c(3, 3, NA, 3, 3) |
| 7 | + ) |
| 8 | + |
| 9 | + out <- df %>% |
| 10 | + summarise( n_distinct_a = n_distinct(a), |
| 11 | + n_distinct_a_na_rm = n_distinct(a, na.rm = TRUE), |
| 12 | + n_distinct_b = n_distinct(b, na.rm = FALSE), |
| 13 | + n_distinct_b_na_rm = n_distinct(b, na.rm = TRUE) |
| 14 | + ) |
| 15 | + |
| 16 | + expect_equal(out$n_distinct_a, 2) |
| 17 | + expect_equal(out$n_distinct_a_na_rm, 2) |
| 18 | + expect_equal(out$n_distinct_b, 2) |
| 19 | + expect_equal(out$n_distinct_b_na_rm, 1) |
| 20 | +}) |
| 21 | + |
| 22 | + |
| 23 | +test_that("duckdb n_distinct() counts empty inputs", { |
| 24 | + withr::local_envvar(DUCKPLYR_FORCE = TRUE) |
| 25 | + |
| 26 | + df <- duckdb_tibble( |
| 27 | + a = integer(), |
| 28 | + b = double(), |
| 29 | + c = logical(), |
| 30 | + d = character() |
| 31 | + ) |
| 32 | + |
| 33 | + out <- df %>% |
| 34 | + summarise( n_distinct_a = n_distinct(a), |
| 35 | + n_distinct_b = n_distinct(b), |
| 36 | + n_distinct_c = n_distinct(c), |
| 37 | + n_distinct_d = n_distinct(d), |
| 38 | + ) |
| 39 | + |
| 40 | + expect_equal(out$n_distinct_a, 0) |
| 41 | + expect_equal(out$n_distinct_b, 0) |
| 42 | + expect_equal(out$n_distinct_c, 0) |
| 43 | + expect_equal(out$n_distinct_d, 0) |
| 44 | +}) |
| 45 | + |
| 46 | + |
| 47 | +test_that("duckdb n_distinct() counts unique values in simple vectors", { |
| 48 | + withr::local_envvar(DUCKPLYR_FORCE = TRUE) |
| 49 | + |
| 50 | + df <- duckdb_tibble( |
| 51 | + a = c(TRUE, FALSE, NA), |
| 52 | + b = c(1, 2, NA), |
| 53 | + c = c(1L, 2L, NA), |
| 54 | + d = c("x", "y", NA) |
| 55 | + ) |
| 56 | + |
| 57 | + out <- df %>% |
| 58 | + summarise( n_distinct_a = n_distinct(a), |
| 59 | + n_distinct_b = n_distinct(b), |
| 60 | + n_distinct_c = n_distinct(c), |
| 61 | + n_distinct_d = n_distinct(d), |
| 62 | + ) |
| 63 | + |
| 64 | + expect_equal(out$n_distinct_a, 3) |
| 65 | + expect_equal(out$n_distinct_b, 3) |
| 66 | + expect_equal(out$n_distinct_c, 3) |
| 67 | + expect_equal(out$n_distinct_d, 3) |
| 68 | +}) |
| 69 | + |
| 70 | + |
| 71 | +test_that("duckdb n_distinct() can drop missing values", { |
| 72 | + withr::local_envvar(DUCKPLYR_FORCE = TRUE) |
| 73 | + |
| 74 | + df <- duckdb_tibble( |
| 75 | + a = c(NA), |
| 76 | + b = c(NA, 0), |
| 77 | + ) |
| 78 | + |
| 79 | + out <- df %>% |
| 80 | + summarise( n_distinct_a = n_distinct(a, na.rm = TRUE), |
| 81 | + n_distinct_b = n_distinct(b, na.rm = TRUE), |
| 82 | + ) |
| 83 | + |
| 84 | + expect_equal(out$n_distinct_a, 0) |
| 85 | + expect_equal(out$n_distinct_b, 1) |
| 86 | +}) |
| 87 | + |
| 88 | + |
| 89 | +test_that("duckdb n_distinct() counts NA correctly", { |
| 90 | + withr::local_envvar(DUCKPLYR_FORCE = TRUE) |
| 91 | + |
| 92 | + df <- duckdb_tibble( |
| 93 | + a = c(1, NA, 1, NA, 2, NA, 2), |
| 94 | + b = c(3, 3, NA, 3, NA, 4, 5) |
| 95 | + ) |
| 96 | + |
| 97 | + out <- df %>% |
| 98 | + summarise( n_distinct_a = n_distinct(a), |
| 99 | + n_distinct_a_na_rm = n_distinct(a, na.rm = TRUE), |
| 100 | + n_distinct_b = n_distinct(b, na.rm = FALSE), |
| 101 | + n_distinct_b_na_rm = n_distinct(b, na.rm = TRUE) |
| 102 | + ) |
| 103 | + |
| 104 | + expect_equal(out$n_distinct_a, 3) |
| 105 | + expect_equal(out$n_distinct_a_na_rm, 2) |
| 106 | + expect_equal(out$n_distinct_b, 4) |
| 107 | + expect_equal(out$n_distinct_b_na_rm, 3) |
| 108 | +}) |
| 109 | + |
| 110 | + |
| 111 | +test_that("duckdb n_distinct() error with more than one argument", { |
| 112 | + withr::local_envvar(DUCKPLYR_FORCE = TRUE) |
| 113 | + |
| 114 | + df <- duckdb_tibble( |
| 115 | + a = c(1, 1, 2, 2, 2), |
| 116 | + b = c(3, 3, NA, 3, 3) |
| 117 | + ) |
| 118 | + |
| 119 | + expect_snapshot( error = TRUE, { |
| 120 | + df %>% summarise( dummy = n_distinct(a, b) ) |
| 121 | + }) |
| 122 | +}) |
| 123 | + |
| 124 | + |
| 125 | +test_that("duckdb n_distinct() error with na.rm not being TRUE/FALSE", { |
| 126 | + withr::local_envvar(DUCKPLYR_FORCE = TRUE) |
| 127 | + |
| 128 | + df <- duckdb_tibble( |
| 129 | + a = c(1, 2), |
| 130 | + ) |
| 131 | + |
| 132 | + expect_snapshot( error = TRUE, { |
| 133 | + df %>% summarise( dummy = n_distinct(a, na.rm = "b") ) |
| 134 | + }) |
| 135 | +}) |
| 136 | + |
| 137 | + |
| 138 | +test_that("duckdb n_distinct() error with mutate", { |
| 139 | + withr::local_envvar(DUCKPLYR_FORCE = TRUE) |
| 140 | + |
| 141 | + df <- duckdb_tibble( |
| 142 | + a = c(1, 1, 2, 2, 2), |
| 143 | + b = c(3, 3, NA, 3, 3) |
| 144 | + ) |
| 145 | + |
| 146 | + expect_snapshot( error = TRUE, { |
| 147 | + df %>% mutate( dummy = n_distinct(a) ) |
| 148 | + }) |
| 149 | +}) |
0 commit comments