Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Catching errors for mismatches in alt_counts_df splits #721

Merged
merged 18 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* Cleaned up spelling in documentation ([#685](https://github.com/insightsengineering/rtables/issues/685))
* Added `qtable_layout` and fixed `qtable` labeling via `row_labels` ([#698](https://github.com/insightsengineering/rtables/issues/698))
* Added vignette on exploratory analysis with `qtable`.
* Error catching and test coverage for cases where `alt_counts_df` presents different splits from `df`.

## rtables 0.6.2
* Fixed major regressions for `page_by` machinery caused by migration to `formatters` 0.5.1 pagination framework.
Expand Down
34 changes: 34 additions & 0 deletions R/tt_dotabulation.R
Original file line number Diff line number Diff line change
Expand Up @@ -753,12 +753,46 @@ setMethod(".make_split_kids", "Split",
spl_context = spl_context)[["datasplit"]],
error = function(e) e)

# Removing NA rows - to explore why this happens at all in a split
# This would be a fix but it is done in post-processing instead of pre-proc -> xxx
# x alt_dfpart <- lapply(alt_dfpart, function(data) {
# x data[!apply(is.na(data), 1, all), ]
# x })

# Error localization
if (is(alt_dfpart, "error")) {
stop("Following error encountered in splitting alt_counts_df: ",
alt_dfpart$message,
call. = FALSE)
}
# Error if split does not have the same values in the alt_df (and order)
# The following breaks if there are different levels (do_split returns empty list)
# or if there are different number of the same levels. Added handling of NAs
# in the values of the factor when is all only NAs
is_all_na <- all(is.na(alt_df[[spl_payload(spl)]]))

if (!all(names(dataspl) %in% names(alt_dfpart)) ||
length(alt_dfpart) != length(dataspl) ||
is_all_na) {
alt_df_spl_vals <- unique(alt_df[[spl_payload(spl)]])
end_part <- ""

if (!all(alt_df_spl_vals %in% levels(alt_df_spl_vals))) {
end_part <- paste0(" and following levels: ",
paste_vec(levels(alt_df_spl_vals)))
}

if (is_all_na) {
end_part <- ". Found only NAs in alt_counts_df split"
}

stop("alt_counts_df split variable(s) [", spl_payload(spl),
"] (in split ", as.character(class(spl)),
") does not have the same factor levels of df.\ndf has c(", '"',
paste(names(dataspl), collapse = '", "'), '"', ") levels while alt_counts_df has ",
ifelse(length(alt_df_spl_vals) > 0, paste_vec(alt_df_spl_vals), ""),
" unique values", end_part)
}
} else {
alt_dfpart <- setNames(rep(list(NULL), length(dataspl)), names(dataspl))
}
Expand Down
5 changes: 5 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,8 @@ spl_context_to_disp_path <- function(ctx) {
ret <- "root"
ret
}

# Utility function to paste vector of values in a nice way
paste_vec <- function(vec) {
paste0('c("', paste(vec, collapse = '", "'), '")')
}
55 changes: 55 additions & 0 deletions tests/testthat/test-tab_afun_cfun.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,61 @@ test_that("Error localization for missing split variable when done in alt_count_
expect_silent(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM))
})

test_that("Error localization for missmatch split variable when done in alt_count_df", {
afun_tmp <- function(x, .alt_df_row, .spl_context,...) {
# Important check that order is aligned even if source levels are not
check_val <- unique(.alt_df_row$ARMCD)
# This is something mysterious happening in splits for which if the values are all
# NAs in the split column, the dataspl has the nrow of the data in NA rows. xxx ToFix
check_val <- check_val[!is.na(check_val)]
stopifnot(as.character(check_val) == .spl_context$value[2])
mean(x)
}
lyt_row <- basic_table() %>% split_rows_by("ARMCD") %>% analyze("BMRKR1", afun = afun_tmp)

# Mismatch in the number of splits (NA is 0)
DM_tmp <- DM %>% mutate("ARMCD" = NA_character_)
expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp),
regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *"))

# Mismatch of levels
armcd_col <- factor(sample(c("arm A", "arm B", "arm C"), nrow(DM), replace = TRUE))
DM_tmp <- DM %>% mutate("ARMCD" = armcd_col)
expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp),
regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *"))

# Mix mismatch of levels
armcd_col <- factor(sample(c("arm A", "ARM B", "ARM C"), nrow(DM), replace = TRUE))
DM_tmp <- DM %>% mutate("ARMCD" = armcd_col)
expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp),
regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *"))

# Mismatch in the number of levels
armcd_col2 <- factor(sample(levels(ex_adsl$ARMCD)[c(1, 2)], nrow(DM), replace = TRUE))
DM_tmp <- DM %>% mutate("ARMCD" = armcd_col2)
expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp),
regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *"))

# Another order -> should work? yes, split is valid
levels(armcd_col) <- levels(ex_adsl$ARMCD)[c(1, 3, 2)]
DM_tmp <- DM %>% mutate("ARMCD" = armcd_col)
expect_silent(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp))

# Mix mismatch of levels but covering them all -> valid split
armcd_col <- factor(sample(c("arm A", levels(ex_adsl$ARMCD)), nrow(DM), replace = TRUE))
DM_tmp <- DM %>% mutate("ARMCD" = armcd_col)
expect_silent(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp))

# Values are all NA, but the levels are correct
DM_tmp$ARMCD <- factor(NA, levels = levels(ex_adsl$ARMCD))
expect_error(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp),
regexp = paste0("alt_counts_df split variable\\(s\\) \\[ARMCD\\] *"))

DM_tmp$ARMCD <- factor(NA, levels = levels(ex_adsl$ARMCD))
DM_tmp$ARMCD[seq_along(levels(ex_adsl$ARMCD))] <- levels(ex_adsl$ARMCD)
expect_silent(lyt_row %>% build_table(ex_adsl, alt_counts_df = DM_tmp))
})

context("Content functions (cfun)")

test_that(".alt_df_row appears in cfun but not in afun.", {
Expand Down
Loading