Open
Description
The documentation for the parsnip encodings (aka those set by parsnip::set_encodings()
) could/should be extended to give more guidance on how to use them.
When looking at what we do across different engine interfaces, I had two main questions:
- For engines with a formula interface, why/when do we use anything other than
predictor_indicators = "none"
? - For engines with a matrix/data frame interface, why/when do we use
predictor_indicators = "traditional"
withcompute_intercept = FALSE
?
library(agua)
#> Loading required package: parsnip
library(baguette)
library(bonsai)
library(censored)
#> Loading required package: survival
library(discrim)
library(multilevelmod)
library(plsmod)
library(poissonreg)
library(rules)
library(dplyr)
library(purrr)
mod_names <- get_from_env("models")
model_interface <-
purrr::map_dfr(mod_names, ~ get_from_env(paste0(.x, "_fit")) %>%
mutate(model = .x)) %>%
mutate(interface = map_chr(value, 1)) %>%
select(engine, mode, model, interface)
model_encodings <-
purrr::map_dfr(mod_names, ~ get_from_env(paste0(.x, "_encoding"))) %>%
left_join(model_interface, by = join_by(model, engine, mode))
# formula interface
model_encodings %>%
filter(interface == "formula") %>%
count(predictor_indicators, compute_intercept, remove_intercept)
#> # A tibble: 3 × 4
#> predictor_indicators compute_intercept remove_intercept n
#> <chr> <lgl> <lgl> <int>
#> 1 none FALSE FALSE 54
#> 2 traditional FALSE FALSE 1
#> 3 traditional TRUE TRUE 22
# matrix/data.frame interface
model_encodings %>%
filter(interface != "formula") %>%
count(predictor_indicators, compute_intercept, remove_intercept)
#> # A tibble: 5 × 4
#> predictor_indicators compute_intercept remove_intercept n
#> <chr> <lgl> <lgl> <int>
#> 1 none FALSE FALSE 35
#> 2 one_hot FALSE TRUE 2
#> 3 traditional FALSE FALSE 2
#> 4 traditional FALSE TRUE 7
#> 5 traditional TRUE TRUE 14
Created on 2024-01-09 with reprex v2.0.2