Closed
Description
Following on from tidymodels/censored#272, and moving beyond glmnet models, I found a bug in workflows where we end up with one too many intercepts...
Workflows adds an intercept in the pre
stage, and then another gets added by the expansion of the .
in the model formula.
library(parsnip)
library(workflows)
wflow_fit <-
workflow() %>%
add_formula(mpg ~ cyl + disp + hp) %>%
add_model(linear_reg(), formula = mpg ~ cyl + disp + hp) %>%
fit(data = mtcars)
# this is what we'd expect
coef(wflow_fit$fit$fit$fit)
#> (Intercept) cyl disp hp
#> 34.18491917 -1.22741994 -0.01883809 -0.01467933
wflow_fit <-
workflow() %>%
add_formula(mpg ~ cyl + disp + hp) %>%
add_model(linear_reg(), formula = mpg ~ .) %>%
fit(data = mtcars)
# this has one too many intercepts
coef(wflow_fit$fit$fit$fit)
#> (Intercept) `(Intercept)` cyl disp hp
#> 34.18491917 NA -1.22741994 -0.01883809 -0.01467933
Created on 2023-11-24 with reprex v2.0.2
So far, I think this does only affect this specific case of preprocessing formula + dot in model formula.
Reprex for it not being the dot expansion in the preprocessing formula
library(parsnip)
library(workflows)
wflow_fit <-
workflow() %>%
add_formula(mpg ~ .) %>%
add_model(linear_reg(), formula = mpg ~ cyl + disp + hp) %>%
fit(data = mtcars)
coef(wflow_fit$fit$fit$fit)
#> (Intercept) cyl disp hp
#> 34.18491917 -1.22741994 -0.01883809 -0.01467933
Created on 2023-11-24 with reprex v2.0.2
Reprex for it being the interaction with the preprocessing formula
library(parsnip)
library(workflows)
library(recipes)
# recipe as preprocessor --------------------------------------------------
wflow_fit <-
workflow() %>%
add_recipe(recipe(mpg ~ cyl + disp + hp, mtcars)) %>%
add_model(linear_reg(), formula = mpg ~ cyl + disp + hp) %>%
fit(data = mtcars)
coef(wflow_fit$fit$fit$fit)
#> (Intercept) cyl disp hp
#> 34.18491917 -1.22741994 -0.01883809 -0.01467933
wflow_fit <-
workflow() %>%
add_recipe(recipe(mpg ~ cyl + disp + hp, mtcars)) %>%
add_model(linear_reg(), formula = mpg ~ .) %>%
fit(data = mtcars)
coef(wflow_fit$fit$fit$fit)
#> (Intercept) cyl disp hp
#> 34.18491917 -1.22741994 -0.01883809 -0.01467933
# variables as preprocessor -----------------------------------------------
wflow_fit <-
workflow() %>%
add_variables(outcomes = mpg, predictors = c(cyl, disp, hp)) %>%
add_model(linear_reg(), formula = mpg ~ cyl + disp + hp) %>%
fit(data = mtcars)
coef(wflow_fit$fit$fit$fit)
#> (Intercept) cyl disp hp
#> 34.18491917 -1.22741994 -0.01883809 -0.01467933
wflow_fit <-
workflow() %>%
add_variables(outcomes = mpg, predictors = c(cyl, disp, hp)) %>%
add_model(linear_reg(), formula = mpg ~ .) %>%
fit(data = mtcars)
coef(wflow_fit$fit$fit$fit)
#> (Intercept) cyl disp hp
#> 34.18491917 -1.22741994 -0.01883809 -0.01467933
Created on 2023-11-24 with reprex v2.0.2