Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 86 additions & 25 deletions R/00tabletrees.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ setClassUnion("SubsetDef", c("expression", "logical", "integer", "numeric"))

setClassUnion("integerOrNULL", c("NULL", "integer"))
setClassUnion("characterOrNULL", c("NULL", "character"))
setClassUnion("characterOrList", c("list", "character"))

## should XXX [splits, s_values, sval_labels, subset(?)] be a data.frame?
setClass("TreePos", representation(
Expand All @@ -60,10 +61,29 @@ validity = function(object) {
}
)

setOldClass(c("FormatList", "list"))

FormatList <- function(..., .list = list(...)) {
if (!is.list(.list)) {
.list <- list(.list)
}
valid <- vapply(.list, is, class2 = "FormatSpec", TRUE)
if (!are(.list, "FormatSpec")) {
stop(
"Attempted to construct FormatList with elements that are not ",
"FormatSpec compatible. This should not happen, please contact ",
"the maintainers."
)
}

class(.list) <- c("FormatList", "list")
.list
}

setClassUnion("functionOrNULL", c("NULL", "function"))
setClassUnion("listOrNULL", c("NULL", "list"))
## TODO (?) make "list" more specific, e.g FormatList, or FunctionList?
setClassUnion("FormatSpec", c("NULL", "character", "function", "list"))
setClassUnion("FormatSpec", c("NULL", "character", "function", "list", "FormatList"))
setClassUnion("ExprOrNULL", c("NULL", "expression"))

setClass("ValueWrapper", representation(
Expand Down Expand Up @@ -133,7 +153,7 @@ setClass("Split",
name = "character",
split_label = "character",
split_format = "FormatSpec",
split_na_str = "character",
split_na_str = "characterOrList",
split_label_position = "character",
## NB this is the function which is applied to
## get the content rows for the CHILDREN of this
Expand Down Expand Up @@ -633,7 +653,9 @@ setClass("VAnalyzeSplit",
representation(
default_rowlabel = "character",
include_NAs = "logical",
var_label_position = "character"
var_label_position = "character",
row_formats_var = "characterOrNULL",
row_na_strs_var = "characterOrNULL"
)
)

Expand Down Expand Up @@ -672,7 +694,9 @@ AnalyzeVarSplit <- function(var,
indent_mod = 0L,
label_pos = "default",
cvar = "",
section_div = NA_character_) {
section_div = NA_character_,
formats_var = NULL,
na_strs_var = NULL) {
check_ok_label(split_label)
label_pos <- match.arg(label_pos, c("default", label_pos_values))
if (!any(nzchar(defrowlab))) {
Expand Down Expand Up @@ -701,7 +725,9 @@ AnalyzeVarSplit <- function(var,
page_title_prefix = NA_character_,
child_section_div = section_div,
child_show_colcounts = FALSE,
child_colcount_format = NA_character_
child_colcount_format = NA_character_,
row_formats_var = formats_var,
row_na_strs_var = na_strs_var
) ## no content_extra_args
}

Expand Down Expand Up @@ -823,7 +849,9 @@ AnalyzeMultiVars <- function(var,
child_labels = c("default", "topleft", "visible", "hidden"),
child_names = var,
cvar = "",
section_div = NA_character_) {
section_div = NA_character_,
formats_var = NULL,
na_strs_var = NULL) {
## NB we used to resolve to strict TRUE/FALSE for label visibillity
## in this function but that was too greedy for repeated
## analyze calls, so that now occurs in the tabulation machinery
Expand All @@ -842,26 +870,59 @@ AnalyzeMultiVars <- function(var,
## split_format = .repoutlst(split_format, nv)
inclNAs <- .repoutlst(inclNAs, nv)
section_div_if_multivar <- if (length(var) > 1) NA_character_ else section_div
pld <- mapply(AnalyzeVarSplit,
var = var,
split_name = child_names,
split_label = split_label,
afun = afun,
defrowlab = defrowlab,
cfun = cfun,
cformat = cformat,
## split_format = split_format,
inclNAs = inclNAs,
MoreArgs = list(
extra_args = extra_args,
indent_mod = indent_mod,
label_pos = show_kidlabs,
split_format = split_format,
split_na_str = split_na_str,
section_div = section_div_if_multivar
), ## rvis),
SIMPLIFY = FALSE

moreargs <- list(
extra_args = extra_args,
indent_mod = indent_mod,
label_pos = show_kidlabs,
section_div = section_div_if_multivar,
formats_var = formats_var,
na_strs_var = na_strs_var
)
mv_list_case <- is.list(split_format) &&
all(var %in% names(split_format)) &&
all(vapply(split_format, is, class2 = "FormatList", TRUE))
if (mv_list_case) { # diff format list for each var
stopifnot(all(var %in% names(split_na_str)))
## split_value does *not* go in more args, not constant across vars
pld <- mapply(
AnalyzeVarSplit,
var = var,
split_name = child_names,
split_label = split_label,
afun = afun,
defrowlab = defrowlab,
cfun = cfun,
cformat = cformat,
## in case they're in the wrong order for some insane reason
split_format = split_format[var],
split_na_str = split_na_str[var],
inclNAs = inclNAs,
MoreArgs = moreargs, ## rvis),
SIMPLIFY = FALSE
)
} else { # not diff lists for each var
## split format goes in more args because its constant across vars
pld <- mapply(
AnalyzeVarSplit,
var = var,
split_name = child_names,
split_label = split_label,
afun = afun,
defrowlab = defrowlab,
cfun = cfun,
cformat = cformat,
inclNAs = inclNAs,
MoreArgs = c(
moreargs,
list(
split_format = split_format,
split_na_str = split_na_str
)
), ## rvis),
SIMPLIFY = FALSE
)
}
} else {
## we're combining existing splits here
pld <- unlist(lapply(.payload, .uncompound))
Expand Down
10 changes: 9 additions & 1 deletion R/argument_conventions.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,14 @@ gen_args <- function(df, alt_counts_df, spl, pos, tt, tr, verbose, colwidths, ob
#' functions. See [formatters::list_valid_format_labels()] for a list of all available format strings.
#' @param format_na_str (`string`)\cr string which should be displayed when formatted if this cell's value(s)
#' are all `NA`.
#' @param formats_var (`string` or `NULL`)\cr `NULL` (the default) or the name of the list column containing named
#' lists of default formats to use. These will be applied with the same precedence as the `format` argument; i.e.,
#' they will not override formats (other than `"default"`) set within the afun.
#' Cannot be used simultaneously with `format`.
#' @param na_strs_var (`string` or `NULL`)\cr `NULL` (the default) or the name of the list column containing named
#' lists of default NA strings to use. These will be applied with the same precedence as the `format` argument; i.e.,
#' they will not override formats (other than `"default"`) set within the afun.
#' Cannot be used simultaneously with `format`. Cannot be used if `formats_var` is `NULL`.
#' @param indent_mod (`numeric`)\cr modifier for the default indent position for the structure created by this
#' function (subtable, content table, or row) *and all of that structure's children*. Defaults to 0, which
#' corresponds to the unmodified default behavior.
Expand Down Expand Up @@ -157,7 +165,7 @@ lyt_args <- function(lyt, var, vars, label, labels_var, varlabels, varnames, spl
var_labels, cvar,
table_names, topleft, align, page_by, page_prefix,
format_na_str, section_div, na_str, show_colcounts,
colcount_format, parent_name) {
colcount_format, parent_name, formats_var, na_strs_var) {
NULL
}

Expand Down
113 changes: 106 additions & 7 deletions R/colby_constructors.R
Original file line number Diff line number Diff line change
Expand Up @@ -1067,12 +1067,81 @@ NULL
#' divider will be overridden by a split-level section divider when
#' both apply to the same position in the rendered output.
#'
#' @inherit split_cols_by return
#'
#' @details
#' When non-`NULL`, `format` is used to specify formats for all generated rows, and can be a character vector, a
#' function, or a list of functions. It will be repped out to the number of rows once this is calculated during the
#' tabulation process, but will be overridden by formats specified within `rcell` calls in `afun`.
#' @details When `length(vars) > 1` and when two calls to `analyze`
#' are done in sequence (the second with the default `nested =
#' TRUE`), the analyses will be combined into a multi-variable
#' analysis that will be reflected in the row structure of the
#' resulting table. In these cases, the default is to show the
#' label describing the variable analyzed for each of the
#' resulting subtables, while that is hidden by default in
#' one-variable cases.
#'
#' # Specifying Default Formatting Behavior
#'
#' *Default* formatting behavior for rows generated by `afun` can be
#' specified by one of `format` or `formats_var`. In both cases, these
#' default formatting instructions *will not* supersede formatting
#' specified from within `afun` at either the `rcell` or `in_rows`
#' call levels; They will only apply to rows/cells whose formatting as
#' returned by `afun` is either `NULL` or `"default"`. When
#' non-`NULL`, `format` is used to specify formats for all generated
#' rows, and can be a character vector, a function, or a list of
#' functions. It will be repped out to the number of rows once this is
#' calculated during the tabulation process, but will be overridden by
#' formats specified within `rcell` calls in `afun`.
#'
#' `format` can accept a format label string (see
#' [formatters::list_valid_format_labels()]), a formatting function, an
#' unnamed list, or a named list.
#'
#' When `format` is an unnamed list - or a named list where not all
#' values of `vars` appear in the names - its elements will be repped
#' out to the number of rows generated by `afun` (separately) within
#' each row facet `afun` is applied within. **This includes recycling
#' behavior, even in the case where the number of rows is not cleanly
#' divisible by the number of specified formats**. This behavior is
#' retained largely for legacy reasons and switching to the new
#' named-list behavior is advised where applicable.
#'
#' When `format` is a named list whose names contain all values in
#' `vars`, the elements of `format` are taken to be specific to the
#' analysis of the corresponding variable; this allows us to specify a
#' multi-variable analysis where e.g., the different variables are
#' analyzed by the same `afun` but have different levels of
#' measurement precision (and thus different formatting needs). In
#' this case the var-specific formatting can be a single format (label
#' string or function) or can be a named list whose names will be
#' matched up to those of the rows generated by applying `afun` in
#' each row facet. Matching of formats to rows is performed the same
#' as in the `formats_var` case and is described below.
#'
#' When `formats_var` is non-`NULL`, it specifies the name of a list
#' column containing formatting instructions for one or more rows
#' `afun` will generate when applied within a row facet. This can be
#' used when the analysis results for a single variable (e.g., `value`
#' or `AVAL` in long-form data) should be formatted differently within
#' different row facets (e.g., when faceting on `statistic` or
#' `PARAMCD`). The value of `df[[formats_var]]` is assumed without
#' verification to be constant within each row facet `afun` is applied
#' within, and the first (list) value of the column within the row
#' facet data will be used.
#'
#' In the `formats_var` case as well as the case of `format` being a
#' named list containing the values of `vars`, after rows are created
#' during tabulation, the default formats are matched and applied to
#' them as follows:
#'
#' 1. When the generated row's name (as given by `obj_name`) matches
#' a name in the list, the corresponding default format is applied,
#' 2. for those without exact matches, the default format whose name
#' provides *the best partial match* to each row name is applied,
#' 3. For those without default format names that partially match
#' the row name, no default format is applied.
#'
#' Note carefully that in (2), it is the names of the list of formats
#' that are partially matching the row names not the other way around.
#'
#' # The Analysis Function
#'
#' The analysis function (`afun`) should take as its first parameter either `x` or `df`. Whichever of these the
#' function accepts will change the behavior when tabulation is performed as follows:
Expand All @@ -1086,6 +1155,8 @@ NULL
#' other parameters which, *if and only if* present in the formals, will be passed to the function by the tabulation
#' machinery. These are listed and described in [additional_fun_params].
#'
#' @inherit split_cols_by return
#'
#' @note None of the arguments described in [additional_fun_params] can be overridden via `extra_args` or when calling
#' [make_afun()]. `.N_col` and `.N_total` can be overridden via the `col_counts` argument to [build_table()].
#' Alternative values for the others must be calculated within `afun` based on a combination of extra arguments and
Expand Down Expand Up @@ -1124,7 +1195,9 @@ analyze <- function(lyt,
table_names = vars,
parent_name = NULL,
format = NULL,
formats_var = NULL,
na_str = NA_character_,
na_strs_var = NULL,
nested = TRUE,
## can't name this na_rm symbol conflict with possible afuns!!
inclNAs = FALSE,
Expand All @@ -1134,6 +1207,19 @@ analyze <- function(lyt,
section_div = NA_character_) {
show_labels <- match.arg(show_labels)
subafun <- substitute(afun)
if (!is.null(format) && !is.null(formats_var)) {
stop(
"Cannot use 'format' and 'formats_var' arguments at ",
"the same time. Please choose one method for specifying ",
"default formatting."
)
} else if (is.null(formats_var) && !is.null(na_strs_var)) {
stop(
"Cannot use 'na_strs_var' (got ",
na_strs_var,
") without using 'formats_var'."
)
}
# R treats a single NA value as a logical atomic. The below
# maps all the NAs in `var_labels` to NA_character_ required by `Split`
# and avoids the error when `var_labels` is just c(NA).
Expand All @@ -1159,6 +1245,17 @@ analyze <- function(lyt,
defrowlab <- var_labels
}

## hook up the new hotness
var_format_lists <- length(vars) > 1 &&
is.list(format) &&
all(vars %in% names(format))

if (var_format_lists) {
format <- lapply(format, function(x) FormatList(.list = x))
if (is.character(na_str)) {
na_str <- lapply(format, function(x) na_str)
}
}
spl <- AnalyzeMultiVars(vars, var_labels,
afun = afun,
split_format = format,
Expand All @@ -1170,7 +1267,9 @@ analyze <- function(lyt,
child_names = table_names,
child_labels = show_labels,
section_div = section_div,
split_name = parent_name
split_name = parent_name,
formats_var = formats_var,
na_strs_var = na_strs_var
)

if (nested && (is(last_rowsplit(lyt), "VAnalyzeSplit") || is(last_rowsplit(lyt), "AnalyzeMultiVars"))) {
Expand Down
Loading
Loading