tidymodels · DavisVaughan · Jul 1, 2020 · Jun 23, 2020 · Jun 23, 2020 · Jun 23, 2020
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,11 +22,12 @@ Imports:
     ellipsis (>= 0.2.0),
     generics,
     glue,
-    hardhat (>= 0.1.2),
-    parsnip (>= 0.0.4),
+    hardhat (>= 0.1.3.9000),
+    parsnip (>= 0.1.1.9000),
     rlang (>= 0.4.1)
 Remotes:
-    tidymodels/parsnip
+    tidymodels/parsnip#332,
+    tidymodels/hardhat
 Suggests: 
     covr,
     knitr,

diff --git a/NEWS.md b/NEWS.md
@@ -1,12 +1,12 @@
 # workflows (development version)
 
 * When using a formula preprocessor with `add_formula()`, workflows now uses
-  model-specific information from parsnip to decide whether or not to expand
-  factors into dummy variables. This should result in more intuitive behavior
-  when working with models that don't require dummy variables. For example,
-  if a parsnip `rand_forest()` model is used with a ranger engine, dummy
-  variables will not be created, because ranger can handle factors directly
-  (#51).
+  model-specific information from parsnip to decide whether to expand
+  factors via dummy encoding (`n - 1` levels), one-hot encoding (`n` levels), or
+  no expansion at all. This should result in more intuitive behavior when
+  working with models that don't require dummy variables. For example, if a
+  parsnip `rand_forest()` model is used with a ranger engine, dummy variables
+  will not be created, because ranger can handle factors directly (#51, #53).
 
 # workflows 0.1.1
 

diff --git a/R/fit.R b/R/fit.R
@@ -12,6 +12,8 @@
 #' In the future, there will also be _postprocessing_ steps that can be added
 #' after the model has been fit.
 #'
+#' @includeRmd man/rmd/indicators.Rmd details
+#'
 #' @param object A workflow
 #'
 #' @param data A data frame of predictors and outcomes to use when fitting the
@@ -187,18 +189,31 @@ finalize_blueprint_recipe <- function(workflow) {
 }
 
 finalize_blueprint_formula <- function(workflow) {
-  # Use the model indicators information to construct the blueprint
-  indicators <- pull_workflow_spec_indicators(workflow)
-  blueprint <- hardhat::default_formula_blueprint(indicators = indicators)
+  tbl_encodings <- pull_workflow_spec_encoding_tbl(workflow)
+
+  indicators <- tbl_encodings$predictor_indicators
+  intercept <- tbl_encodings$compute_intercept
+
+  if (!is_string(indicators)) {
+    abort("Internal error: `indicators` encoding from parsnip should be a string.")
+  }
+  if (!is_bool(intercept)) {
+    abort("Internal error: `intercept` encoding from parsnip should be a bool.")
+  }
+
+  # Use model specific information to construct the blueprint
+  blueprint <- hardhat::default_formula_blueprint(
+    indicators = indicators,
+    intercept = intercept
+  )
 
   formula <- pull_workflow_preprocessor(workflow)
 
   update_formula(workflow, formula = formula, blueprint = blueprint)
 }
 
-pull_workflow_spec_indicators <- function(x) {
-  spec <- pull_workflow_spec(x)
-
+pull_workflow_spec_encoding_tbl <- function(workflow) {
+  spec <- pull_workflow_spec(workflow)
   spec_cls <- class(spec)[[1]]
 
   tbl_encodings <- parsnip::get_encoding(spec_cls)
@@ -207,11 +222,11 @@ pull_workflow_spec_indicators <- function(x) {
   indicator_mode <- tbl_encodings$mode == spec$mode
   indicator_spec <- indicator_engine & indicator_mode
 
-  indicators <- tbl_encodings$predictor_indicators[indicator_spec]
+  out <- tbl_encodings[indicator_spec, , drop = FALSE]
 
-  if (length(indicators) != 1L) {
+  if (nrow(out) != 1L) {
     abort("Internal error: Exactly 1 model/engine/mode combination must be located.")
   }
 
-  indicators
+  out
 }
diff --git a/R/pre-action-formula.R b/R/pre-action-formula.R
@@ -17,6 +17,8 @@
 #' To fit a workflow, one of `add_formula()` or `add_recipe()` _must_ be
 #' specified, but not both.
 #'
+#' @includeRmd man/rmd/add-formula.Rmd details
+#'
 #' @param x A workflow
 #'
 #' @param formula A formula specifying the terms of the model. It is advised to
@@ -26,9 +28,16 @@
 #' @param ... Not used.
 #'
 #' @param blueprint A hardhat blueprint used for fine tuning the preprocessing.
+#'
 #'   If `NULL`, [hardhat::default_formula_blueprint()] is used and is passed
-#'   an `indicators` argument that best aligns with the model present in
-#'   the workflow.
+#'   arguments that best align with the model present in the workflow.
+#'
+#'   Note that preprocessing done here is separate from preprocessing that
+#'   might be done by the underlying model. For example, if a blueprint with
+#'   `indicators = "none"` is specified, no dummy variables will be created by
+#'   hardhat, but if the underlying model requires a formula interface that
+#'   internally uses [stats::model.matrix()], factors will still be expanded to
+#'   dummy variables by the model.
 #'
 #' @return
 #' `x`, updated with either a new or removed formula preprocessor.

diff --git a/R/pre-action-recipe.R b/R/pre-action-recipe.R
@@ -24,8 +24,12 @@
 #' @param ... Not used.
 #'
 #' @param blueprint A hardhat blueprint used for fine tuning the preprocessing.
+#'
 #'   If `NULL`, [hardhat::default_recipe_blueprint()] is used.
 #'
+#'   Note that preprocessing done here is separate from preprocessing that
+#'   might be done automatically by the underlying model.
+#'
 #' @return
 #' `x`, updated with either a new or removed recipe preprocessor.
 #'

diff --git a/man/add_formula.Rd b/man/add_formula.Rd
diff --git a/man/add_model.Rd b/man/add_model.Rd
diff --git a/man/add_recipe.Rd b/man/add_recipe.Rd