Skip to content

Commit a3e805d

Browse files
committed
refactor+doc: key_colnames and vignettes
* key_colnames order change * replace kill_time_value with exclude arg in key_colnames * move duplicate time_values check in epi_slide
1 parent dd19428 commit a3e805d

34 files changed

+755
-773
lines changed

.Rbuildignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,5 @@
1616
^.lintr$
1717
^DEVELOPMENT.md$
1818
man-roxygen
19+
^.venv$
20+
^sandbox.R$

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ docs
1313
renv/
1414
renv.lock
1515
.Rprofile
16+
sandbox.R

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ Imports:
5050
tidyselect (>= 1.2.0),
5151
tsibble,
5252
utils,
53-
vctrs
53+
vctrs,
54+
waldo
5455
Suggests:
5556
covidcast,
5657
devtools,

NAMESPACE

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,8 @@ export("%>%")
5555
export(archive_cases_dv_subset)
5656
export(arrange)
5757
export(arrange_canonical)
58-
export(as_diagonal_slide_computation)
5958
export(as_epi_archive)
6059
export(as_epi_df)
61-
export(as_time_slide_computation)
6260
export(as_tsibble)
6361
export(autoplot)
6462
export(clone)

R/autoplot.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ autoplot.epi_df <- function(
5555

5656
key_cols <- key_colnames(object)
5757
non_key_cols <- setdiff(names(object), key_cols)
58-
geo_and_other_keys <- kill_time_value(key_cols)
58+
geo_and_other_keys <- key_colnames(object, exclude = "time_value")
5959

6060
# --- check for numeric variables
6161
allowed <- purrr::map_lgl(object[non_key_cols], is.numeric)

R/epi_df.R

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -184,18 +184,14 @@ new_epi_df <- function(x = tibble::tibble(geo_value = character(), time_value =
184184
metadata$other_keys <- other_keys
185185

186186
# Reorder columns (geo_value, time_value, ...)
187-
if (sum(dim(x)) != 0) {
188-
cols_to_put_first <- c("geo_value", "time_value", other_keys)
189-
x <- x[, c(
190-
cols_to_put_first,
191-
# All other columns
192-
names(x)[!(names(x) %in% cols_to_put_first)]
193-
)]
187+
if (nrow(x) > 0) {
188+
x <- x %>% relocate(all_of(c("geo_value", other_keys, "time_value")), .before = 1)
194189
}
195190

196191
# Apply epi_df class, attach metadata, and return
197192
class(x) <- c("epi_df", class(x))
198193
attributes(x)$metadata <- metadata
194+
199195
return(x)
200196
}
201197

@@ -281,6 +277,7 @@ as_epi_df.tbl_df <- function(
281277
if (".time_value_counts" %in% other_keys) {
282278
cli_abort("as_epi_df: `other_keys` can't include \".time_value_counts\"")
283279
}
280+
284281
duplicated_time_values <- x %>%
285282
group_by(across(all_of(c("geo_value", "time_value", other_keys)))) %>%
286283
filter(dplyr::n() > 1) %>%

R/grouped_epi_archive.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -397,8 +397,8 @@ epix_slide.grouped_epi_archive <- function(
397397
)),
398398
capture.output(print(waldo::compare(
399399
res[[comp_nms[[comp_i]]]], comp_value[[comp_i]],
400-
x_arg = rlang::expr_deparse(expr(`$`(label, !!sym(comp_nms[[comp_i]])))),
401-
y_arg = rlang::expr_deparse(expr(`$`(comp_value, !!sym(comp_nms[[comp_i]]))))
400+
x_arg = rlang::expr_deparse(dplyr::expr(`$`(label, !!sym(comp_nms[[comp_i]])))), # nolint: object_usage_linter
401+
y_arg = rlang::expr_deparse(dplyr::expr(`$`(comp_value, !!sym(comp_nms[[comp_i]]))))
402402
))),
403403
cli::format_message(c(
404404
"You likely want to rename or remove this column in your output, or debug why it has a different value."

R/key_colnames.R

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,39 +2,46 @@
22
#'
33
#' @param x a data.frame, tibble, or epi_df
44
#' @param ... additional arguments passed on to methods
5-
#'
6-
#' @return If an `epi_df`, this returns all "keys". Otherwise `NULL`
5+
#' @param other_keys an optional character vector of other keys to include
6+
#' @param exclude an optional character vector of keys to exclude
7+
#' @return If an `epi_df`, this returns all "keys". Otherwise `NULL`.
78
#' @keywords internal
89
#' @export
910
key_colnames <- function(x, ...) {
1011
UseMethod("key_colnames")
1112
}
1213

14+
#' @rdname key_colnames
15+
#' @method key_colnames default
1316
#' @export
1417
key_colnames.default <- function(x, ...) {
1518
character(0L)
1619
}
1720

21+
#' @rdname key_colnames
22+
#' @method key_colnames data.frame
1823
#' @export
19-
key_colnames.data.frame <- function(x, other_keys = character(0L), ...) {
24+
key_colnames.data.frame <- function(x, other_keys = character(0L), exclude = character(0L), ...) {
2025
assert_character(other_keys)
21-
nm <- c("geo_value", "time_value", other_keys)
26+
assert_character(exclude)
27+
nm <- setdiff(c("geo_value", other_keys, "time_value"), exclude)
2228
intersect(nm, colnames(x))
2329
}
2430

31+
#' @rdname key_colnames
32+
#' @method key_colnames epi_df
2533
#' @export
26-
key_colnames.epi_df <- function(x, ...) {
34+
key_colnames.epi_df <- function(x, exclude = character(0L), ...) {
35+
assert_character(exclude)
2736
other_keys <- attr(x, "metadata")$other_keys
28-
c("geo_value", "time_value", other_keys)
37+
setdiff(c("geo_value", other_keys, "time_value"), exclude)
2938
}
3039

40+
#' @rdname key_colnames
41+
#' @method key_colnames epi_archive
3142
#' @export
32-
key_colnames.epi_archive <- function(x, ...) {
43+
key_colnames.epi_archive <- function(x, exclude = character(0L), ...) {
44+
assert_character(exclude)
3345
other_keys <- attr(x, "metadata")$other_keys
34-
c("geo_value", "time_value", other_keys)
35-
}
36-
37-
kill_time_value <- function(v) {
38-
assert_character(v)
39-
v[v != "time_value"]
46+
setdiff(c("geo_value", other_keys, "time_value"), exclude)
4047
}

R/methods-epi_archive.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -731,7 +731,7 @@ epix_detailed_restricted_mutate <- function(.data, ...) {
731731
#' library(dplyr)
732732
#'
733733
#' # Reference time points for which we want to compute slide values:
734-
#' versions <- seq(as.Date("2020-06-01"),
734+
#' versions <- seq(as.Date("2020-06-02"),
735735
#' as.Date("2020-06-15"),
736736
#' by = "1 day"
737737
#' )
@@ -780,7 +780,7 @@ epix_detailed_restricted_mutate <- function(.data, ...) {
780780
#' .versions = versions
781781
#' ) %>%
782782
#' ungroup() %>%
783-
#' arrange(geo_value, time_value)
783+
#' arrange(geo_value, version)
784784
#'
785785
#' # --- Advanced: ---
786786
#'

R/methods-epi_df.R

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,13 @@ as_tibble.epi_df <- function(x, ...) {
4141
#' @export
4242
as_tsibble.epi_df <- function(x, key, ...) {
4343
if (missing(key)) key <- c("geo_value", attributes(x)$metadata$other_keys)
44-
return(as_tsibble(tibble::as_tibble(x),
45-
key = tidyselect::all_of(key), index = "time_value",
46-
...
47-
))
44+
return(
45+
as_tsibble(
46+
tibble::as_tibble(x),
47+
key = tidyselect::all_of(key), index = "time_value",
48+
...
49+
)
50+
)
4851
}
4952

5053
#' Base S3 methods for an `epi_df` object
@@ -150,10 +153,10 @@ dplyr_reconstruct.epi_df <- function(data, template) {
150153
# keep any grouping that has been applied:
151154
res <- NextMethod()
152155

153-
cn <- names(res)
156+
col_names <- names(res)
154157

155158
# Duplicate columns, cli_abort
156-
dup_col_names <- cn[duplicated(cn)]
159+
dup_col_names <- col_names[duplicated(col_names)]
157160
if (length(dup_col_names) != 0) {
158161
cli_abort(c(
159162
"Duplicate column names are not allowed",
@@ -163,7 +166,7 @@ dplyr_reconstruct.epi_df <- function(data, template) {
163166
))
164167
}
165168

166-
not_epi_df <- !("time_value" %in% cn) || !("geo_value" %in% cn)
169+
not_epi_df <- !("time_value" %in% col_names) || !("geo_value" %in% col_names)
167170

168171
if (not_epi_df) {
169172
# If we're calling on an `epi_df` from one of our own functions, we need to
@@ -182,7 +185,7 @@ dplyr_reconstruct.epi_df <- function(data, template) {
182185

183186
# Amend additional metadata if some other_keys cols are dropped in the subset
184187
old_other_keys <- attr(template, "metadata")$other_keys
185-
attr(res, "metadata")$other_keys <- old_other_keys[old_other_keys %in% cn]
188+
attr(res, "metadata")$other_keys <- old_other_keys[old_other_keys %in% col_names]
186189

187190
res
188191
}
@@ -424,9 +427,13 @@ arrange_col_canonical.epi_df <- function(x, ...) {
424427
x %>% dplyr::relocate(dplyr::all_of(cols), .before = 1)
425428
}
426429

430+
#' Group an `epi_df` object by default keys
431+
#' @param x an `epi_df`
432+
#' @param exclude character vector of column names to exclude from grouping
433+
#' @return a grouped `epi_df`
427434
#' @export
428-
group_epi_df <- function(x) {
429-
cols <- kill_time_value(key_colnames(x))
435+
group_epi_df <- function(x, exclude = character()) {
436+
cols <- key_colnames(x, exclude = exclude)
430437
x %>% group_by(across(all_of(cols)))
431438
}
432439

@@ -437,7 +444,7 @@ group_epi_df <- function(x) {
437444
#' the resulting `epi_df` will have `geo_value` set to `"total"`.
438445
#'
439446
#' @param .x an `epi_df`
440-
#' @param value_col character vector of the columns to aggregate
447+
#' @param sum_cols character vector of the columns to aggregate
441448
#' @param group_cols character vector of column names to group by. "time_value" is
442449
#' included by default.
443450
#' @return an `epi_df` object

0 commit comments

Comments
 (0)