Closed
Description
As the title says, when you predict from a rank-deficient fit, in 4.3.0 or later, the output of predict.lm()
comes with an attribute attr(*, "non-estim")
, which messes up the prediction coming from {parsnip}, giving it an column name of .pred_res
instead of .pred
. Which THEN turns into errors for most of the {tune} functions. Should be a fairly easy fix.
I found it when working on tidymodels/workshops#105.
R 4.2.0
data <- data.frame(
y = c(1,2,3,4),
x1 = c(1,1,2,3),
x2 = c(3,4,5,2),
x3 = c(4,2,6,0),
x4 = c(2,1,3,0)
)
data2 <- data.frame(
x1 = c(3,2,1,3),
x2 = c(3,2,1,4),
x3 = c(3,4,5,1),
x4 = c(0,0,2,3)
)
lm(y ~ ., data = data) |>
predict(data2)
#> Warning in predict.lm(lm(y ~ ., data = data), data2): prediction from a
#> rank-deficient fit may be misleading
#> 1 2 3 4
#> 3.8888889 1.7777778 -0.3333333 4.8888889
library(parsnip)
linear_reg() |>
fit(y ~ ., data = data) |>
predict(new_data = data2)
#> Warning in predict.lm(object = object$fit, newdata = new_data, type =
#> "response"): prediction from a rank-deficient fit may be misleading
#> # A tibble: 4 × 1
#> .pred
#> <dbl>
#> 1 3.89
#> 2 1.78
#> 3 -0.333
#> 4 4.89
R 4.3.0
data <- data.frame(
y = c(1,2,3,4),
x1 = c(1,1,2,3),
x2 = c(3,4,5,2),
x3 = c(4,2,6,0),
x4 = c(2,1,3,0)
)
data2 <- data.frame(
x1 = c(3,2,1,3),
x2 = c(3,2,1,4),
x3 = c(3,4,5,1),
x4 = c(0,0,2,3)
)
lm(y ~ ., data = data) |>
predict(data2)
#> Warning in predict.lm(lm(y ~ ., data = data), data2): prediction from
#> rank-deficient fit; attr(*, "non-estim") has doubtful cases
#> 1 2 3 4
#> 3.8888889 1.7777778 -0.3333333 4.8888889
#> attr(,"non-estim")
#> 1 2 3 4
#> 1 2 3 4
library(parsnip)
linear_reg() |>
fit(y ~ ., data = data) |>
predict(new_data = data2)
#> Warning in predict.lm(object = object$fit, newdata = new_data, type =
#> "response"): prediction from rank-deficient fit; attr(*, "non-estim") has
#> doubtful cases
#> # A tibble: 4 × 1
#> .pred_res
#> <dbl>
#> 1 3.89
#> 2 1.78
#> 3 -0.333
#> 4 4.89
Created on 2023-07-02 with reprex v2.0.2