Skip to content

Drop non-constant aesthetics more thoroughly #4917

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

47 changes: 39 additions & 8 deletions R/stat-.r
Original file line number Diff line number Diff line change
Expand Up @@ -120,30 +120,61 @@ Stat <- ggproto("Stat",
self$compute_group(data = group, scales = scales, ...)
})

# Record columns that are not constant within groups. We will drop them later.
non_constant_columns <- character(0)

stats <- mapply(function(new, old) {
# In this function,
#
# - `new` is the computed result. All the variables will be picked.
# - `old` is the original data. There are 3 types of variables:
# 1) If the variable is already included in `new`, it's ignored
# because the values of `new` will be used.
# 2) If the variable is not included in `new` and the value is
# constant within the group, it will be picked.
# 3) If the variable is not included in `new` and the value is not
# constant within the group, it will be dropped. We need to record
# the dropped columns to drop it consistently later.

if (empty(new)) return(data_frame0())
unique <- uniquecols(old)
missing <- !(names(unique) %in% names(new))

# First, filter out the columns already included `new` (type 1).
old <- old[, !(names(old) %in% names(new)), drop = FALSE]

# Then, check whether the rest of the columns have constant values (type 2)
# or not (type 3).
non_constant <- vapply(old, function(x) length(unique0(x)) > 1, logical(1L))

# Record the non-constant columns.
non_constant_columns <<- c(non_constant_columns, names(old)[non_constant])

vec_cbind(
new,
unique[rep(1, nrow(new)), missing,drop = FALSE]
# Note that, while the non-constant columns should be dropped, we don't
# do this here because it can be filled by vec_rbind() later if either
# one of the group has a constant value (see #4394 for the details).
old[rep(1, nrow(new)), , drop = FALSE]
)
}, stats, groups, SIMPLIFY = FALSE)

data_new <- vec_rbind(!!!stats)
non_constant_columns <- unique0(non_constant_columns)

# The above code will drop columns that are not constant within groups and not
# We are going to drop columns that are not constant within groups and not
# carried over/recreated by the stat. This can produce unexpected results,
# and hence we warn about it.
dropped <- base::setdiff(names(data), base::union(self$dropped_aes, names(data_new)))
# and hence we warn about it (variables in dropped_aes are expected so
# ignored here).
dropped <- non_constant_columns[!non_constant_columns %in% self$dropped_aes]
if (length(dropped) > 0) {
cli::cli_warn(c(
"The following aesthetics were dropped during statistical transformation: {.field {glue_collapse(dropped, sep = ', ')}}",
"i" = "This can happen when ggplot fails to infer the correct grouping structure in the data.",
"i" = "Did you forget to specify a {.code group} aesthetic or to convert a numerical variable into a factor?"
))
}
data_new

# Finally, combine the results and drop columns that are not constant.
data_new <- vec_rbind(!!!stats)
data_new[, !names(data_new) %in% non_constant_columns, drop = FALSE]
},

compute_group = function(self, data, scales) {
Expand Down
51 changes: 48 additions & 3 deletions tests/testthat/test-stats.r
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,59 @@ test_that("error message is thrown when aesthetics are missing", {
})

test_that("erroneously dropped aesthetics are found and issue a warning", {
df <- data_frame(

# case 1) dropped completely

df1 <- data_frame(
x = c( # arbitrary random numbers
0.42986445, 1.11153170, -1.22318013, 0.90982003,
0.46454276, -0.42300004, -1.76139834, -0.75060412,
0.01635474, -0.63202159
),
g = rep(1:2, each = 5)
)
p <- ggplot(df, aes(x, fill = g)) + geom_density()
expect_warning(ggplot_build(p), "aesthetics were dropped")
p1 <- ggplot(df1, aes(x, fill = g)) + geom_density()
expect_warning(ggplot_build(p1), "aesthetics were dropped")

# case 2-1) dropped partially

df2 <- data_frame(
id = c("a", "a", "b", "b", "c"),
colour = c( 0, 1, 10, 10, 20), # a should be dropped
fill = c( 0, 0, 10, 11, 20) # b should be dropped
)

p2 <- ggplot(df2, aes(id, colour = colour, fill = fill)) + geom_bar()
expect_warning(
b2 <- ggplot_build(p2),
"The following aesthetics were dropped during statistical transformation: .*colour.*, .*fill.*"
)

# colour is dropped because group a's colour is not constant (GeomBar$default_aes$colour is NA)
expect_true(all(is.na(b2$data[[1]]$colour)))
# fill is dropped because group b's fill is not constant
expect_true(all(b2$data[[1]]$fill == GeomBar$default_aes$fill))

# case 2-1) dropped partially with NA

df3 <- data_frame(
id = c("a", "a", "b", "b", "c"),
colour = c( 0, NA, 10, 10, 20), # a should be dropped
fill = c( NA, NA, 10, 10, 20) # a should not be dropped
)

p3 <- ggplot(df3, aes(id, colour = colour, fill = fill)) + geom_bar() +
scale_fill_continuous(na.value = "#123")
expect_warning(
b3 <- ggplot_build(p3),
"The following aesthetics were dropped during statistical transformation: .*colour.*"
)

# colour is dropped because group a's colour is not constant (GeomBar$default_aes$colour is NA)
expect_true(all(is.na(b3$data[[1]]$colour)))
# fill is NOT dropped. Group a's fill is na.value, but others are mapped.
expect_equal(
b3$data[[1]]$fill == "#123",
c(TRUE, FALSE, FALSE)
)
})