tidymodels · topepo · Jun 11, 2025 · Jun 6, 2025 · Jun 6, 2025 · Jun 9, 2025
diff --git a/R/loop_over_all_stages.R b/R/loop_over_all_stages.R
@@ -12,6 +12,7 @@ loop_over_all_stages <- function(resamples, grid, static) {
     type = character(),
     note = character()
   )
+  extracts <- NULL
 
   sched <- schedule_grid(grid, static$wflow)
 
@@ -197,7 +198,24 @@ loop_over_all_stages <- function(resamples, grid, static) {
           pred_reserve <- dplyr::bind_rows(pred_reserve, final_pred)
 
           # --------------------------------------------------------------------
-          # Placeholder for extraction
+          # Extractions
+          if (!is.null(static$control$extract)) {
+            elt_extract <- .catch_and_log_melodie(
+              extract_details(current_wflow, static$control$extract)
+            )
+
+            if (has_log_notes(elt_extract)) {
+              location <- glue::glue(
+                "extraction"
+              )
+              notes <- append_log_notes(notes, elt_extract, location)
+              if (is_failure_melodie(elt_extract)) {
+                next
+              }
+            }
+            elt_extract <- remove_log_notes(elt_extract)
+            extracts <- c(extracts, list(elt_extract))
+          }
         } # post loop
       } # predict loop
     } # model loop
@@ -222,14 +240,25 @@ loop_over_all_stages <- function(resamples, grid, static) {
       dplyr::arrange(.config)
   }
 
+  if (!is.null(extracts)) {
+    extracts <- config_tbl |>
+      dplyr::mutate(.extracts = extracts) |>
+      dplyr::relocate(.config, .after = .extracts)
+  }
+
   # ----------------------------------------------------------------------------
   # Return the results
 
   return_list <- tibble::tibble(
     .metrics = list(all_metrics),
     .notes = list(notes)
-  ) |>
-    vctrs::vec_cbind(split_labs)
+  )
+
+  if (!is.null(extracts)) {
+    return_list <- dplyr::mutate(return_list, .extracts = list(extracts))
+  }
+
+  return_list <- vctrs::vec_cbind(return_list, split_labs)
 
   if (static$control$save_pred) {
     return_list$.predictions <- list(

diff --git a/tests/testthat/_snaps/loop-over-all-stages-logging.md b/tests/testthat/_snaps/loop-over-all-stages-logging.md
@@ -46,6 +46,12 @@
         allow_par = FALSE))
     Message
 
+# captures extract errors
+
+    Code
+      res_fit <- melodie_grid(wf_spec, folds, grid = 2, control = control_grid(
+        allow_par = FALSE, extract = extract_error))
+
 # captures kknn R errors
 
     Code

diff --git a/tests/testthat/test-loop-over-all-stages-logging.R b/tests/testthat/test-loop-over-all-stages-logging.R
@@ -292,6 +292,42 @@ test_that("doesn't capturing message in notes", {
   rm(list = "bake.step_logging_helper", envir = .GlobalEnv)
 })
 
+test_that("captures extract errors", {
+  skip_if_not_installed("modeldata")
+
+  set.seed(1234)
+  ames <- modeldata::ames[, c(72, 40:45)]
+  folds <- rsample::vfold_cv(ames, 2)
+
+  rec_spec <- recipe(Sale_Price ~ ., ames)
+  mod_spec <- parsnip::nearest_neighbor(
+    "regression",
+    "kknn",
+    neighbors = tune()
+  )
+
+  wf_spec <- workflow(rec_spec, mod_spec)
+
+  extract_error <- function(x) {
+    stop("extract error")
+  }
+
+  expect_snapshot(
+    res_fit <- melodie_grid(
+      wf_spec,
+      folds,
+      grid = 2,
+      control = control_grid(allow_par = FALSE, extract = extract_error)
+    ),
+    transform = catalog_lines
+  )
+
+  expect_identical(
+    nrow(collect_notes(res_fit)),
+    4L
+  )
+})
+
 test_that("captures kknn R errors", {
   skip_if_not_installed("modeldata")
 

diff --git a/tests/testthat/test-loop-over-all-stages-post-estimation-and-tuning-extract.R b/tests/testthat/test-loop-over-all-stages-post-estimation-and-tuning-extract.R
@@ -0,0 +1,256 @@
+# These tests reference results generated by tune 1.3.0. The code to generate
+# them (and the results) are found in the `inst` directory.
+# All tests are about extract argument
+
+test_that("verifying loop_over_all_stages, no submodels, post estimation with tuning", {
+  skip_if_not_installed("modeldata")
+  skip_if_not_installed("kknn")
+  skip_if_not_installed("probably")
+  skip_if_not_installed("mgcv")
+
+  load(system.file(
+    "regression_tests",
+    "simple_example.RData",
+    package = "tune"
+  ))
+
+  # ------------------------------------------------------------------------------
+
+  set.seed(1)
+  dat <- modeldata::sim_regression(1000)
+  rs <- vfold_cv(dat)
+
+  rs_split <- rs$splits[[1]]
+  rs_args <- rsample::.get_split_args(rs)
+
+  rs_iter <- tune:::vec_list_rowwise(rs) |>
+    purrr::pluck(1)
+
+  # ------------------------------------------------------------------------------
+
+  mod <- nearest_neighbor(neighbors = 11, weight_func = tune()) |>
+    set_mode("regression")
+  wflow <- workflow(outcome ~ ., mod, reg_cal_max)
+  max_param <-
+    wflow |>
+    extract_parameter_set_dials() |>
+    update(upper_limit = upper_limit(c(0, 1)))
+
+  grd <- max_param |> grid_regular(levels = c(3, 2))
+  upper_vals <- sort(unique(grd$upper_limit))
+
+  ext_1 <- function(x) x
+
+  static_1 <- tune:::make_static(
+    wflow,
+    param_info = max_param,
+    metrics = metric_set(rmse, rsq),
+    eval_time = NULL,
+    split_args = rs_args,
+    control = control_grid(save_pred = TRUE, extract = ext_1)
+  )
+
+  data_1 <- tune:::get_data_subsets(wflow, rs_split, rs_args)
+  static_1 <- tune:::update_static(static_1, data_1)
+  static_1$y_name <- "outcome"
+
+  simple_res <- tune:::loop_over_all_stages(rs_iter, grd, static_1)
+  expect_true(!is.null(simple_res$.metrics[[1]]))
+  expect_named(simple_res, c(".metrics", ".notes", ".extracts", "id", ".predictions"))
+  expect_true(nrow(simple_res) == 1)
+  expect_equal(
+    nrow(simple_res$.predictions[[1]]),
+    nrow(data_1$pred$data) * nrow(grd)
+  )
+
+  extracted <- simple_res$.extracts[[1]]
+  expect_identical(
+    nrow(extracted),
+    nrow(grd)  
+  )
+  expect_named(extracted, c(names(grd), ".extracts", ".config"))
+  expect_true(
+    all(vapply(extracted$.extracts, is_workflow, logical(1)))
+  )
+
+  # TODO more tests can be added when calibration method = "none" is implemented
+})
+
+test_that("verifying loop_over_all_stages, submodels, post estimation with tuning", {
+  skip_if_not_installed("modeldata")
+  skip_if_not_installed("kknn")
+  skip_if_not_installed("probably")
+  skip_if_not_installed("mgcv")
+
+  load(system.file(
+    "regression_tests",
+    "submodel_example.RData",
+    package = "tune"
+  ))
+
+  # ------------------------------------------------------------------------------
+
+  set.seed(1)
+  dat <- modeldata::sim_regression(1000)
+  rs <- vfold_cv(dat)
+
+  rs_split <- rs$splits[[1]]
+  rs_args <- rsample::.get_split_args(rs)
+
+  rs_iter <- tune:::vec_list_rowwise(rs) |>
+    purrr::pluck(1)
+
+  # ------------------------------------------------------------------------------
+
+  rec <- recipe(outcome ~ ., data = dat) |>
+    step_pca(all_numeric_predictors(), num_comp = tune())
+
+  mod <- nearest_neighbor(neighbors = tune("k"), weight_func = tune()) |>
+    set_mode("regression")
+
+  submodel_wflow <- workflow(rec, mod, reg_cal_max)
+  max_param <-
+    submodel_wflow |>
+    extract_parameter_set_dials() |>
+    update(upper_limit = upper_limit(c(0, 1)))
+
+  upper_vals <- c(0, 1)
+
+  # fmt: skip
+  submodel_grid <-
+    tibble::tribble(
+      ~k,   ~weight_func, ~num_comp,
+      9L,  "rectangular",        2L,
+      14L,  "rectangular",        2L,
+      20L,  "rectangular",        2L,
+      4L,   "triangular",        2L,
+      9L,   "triangular",        2L,
+      14L,   "triangular",        2L,
+      20L,   "triangular",        2L,
+      4L, "epanechnikov",        2L,
+      9L, "epanechnikov",        2L,
+      14L, "epanechnikov",        2L,
+      20L, "epanechnikov",        2L,
+      4L,  "rectangular",       10L,
+      9L,  "rectangular",       10L,
+      14L,  "rectangular",       10L,
+      20L,  "rectangular",       10L,
+      4L,   "triangular",       10L,
+      9L,   "triangular",       10L,
+      14L,   "triangular",       10L,
+      20L,   "triangular",       10L,
+      4L, "epanechnikov",       10L,
+      9L, "epanechnikov",       10L,
+      14L, "epanechnikov",       10L,
+      20L, "epanechnikov",       10L
+    ) |>
+    tidyr::crossing(upper_limit = upper_vals)
+
+  ext_1 <- function(x) x
+
+  # ------------------------------------------------------------------------------
+
+  static_1 <- tune:::make_static(
+    submodel_wflow,
+    param_info = max_param,
+    metrics = metric_set(rmse),
+    eval_time = NULL,
+    split_args = rs_args,
+    control = tune::control_grid(save_pred = TRUE, extract = ext_1)
+  )
+
+  data_1 <- tune:::get_data_subsets(submodel_wflow, rs_split, rs_args)
+  static_1 <- tune:::update_static(static_1, data_1)
+  static_1$y_name <- "outcome"
+
+  submodel_res <- tune:::loop_over_all_stages(rs_iter, submodel_grid, static_1)
+  expect_true(!is.null(submodel_res$.metrics[[1]]))
+  expect_named(submodel_res, c(".metrics", ".notes", ".extracts", "id", ".predictions"))
+  expect_true(nrow(submodel_res) == 1)
+  expect_equal(
+    nrow(submodel_res$.predictions[[1]]),
+    nrow(data_1$pred$data) * nrow(submodel_grid)
+  )
+
+  extracted <- submodel_res$.extracts[[1]]
+  expect_identical(
+    nrow(extracted),
+    nrow(submodel_grid)  
+  )
+  expect_named(extracted, c(names(submodel_grid), ".extracts", ".config"))
+  expect_true(
+    all(vapply(extracted$.extracts, is_workflow, logical(1)))
+  )
+})
+
+test_that("verifying loop_over_all_stages, submodels only, post estimation with tuning", {
+  skip_if_not_installed("modeldata")
+  skip_if_not_installed("kknn")
+  skip_if_not_installed("probably")
+  skip_if_not_installed("mgcv")
+
+  load(system.file(
+    "regression_tests",
+    "submodel_only_example.RData",
+    package = "tune"
+  ))
+
+  # ------------------------------------------------------------------------------
+
+  set.seed(1)
+  dat <- modeldata::sim_classification(1000)
+  rs <- vfold_cv(dat)
+
+  rs_split <- rs$splits[[1]]
+  rs_args <- rsample::.get_split_args(rs)
+
+  rs_iter <- tune:::vec_list_rowwise(rs) |>
+    purrr::pluck(1)
+
+  # ------------------------------------------------------------------------------
+
+  mod <- nearest_neighbor(neighbors = tune(), weight_func = "triangular") |>
+    set_mode("classification")
+
+  submodel_only_wflow <- workflow(class ~ ., mod, cls_cal_tune_post)
+  cut_vals <- c(.1, .9)
+  submodel_only_grid <- tidyr::crossing(neighbors = 3:10, cut = cut_vals)
+
+  ext_1 <- function(x) x
+
+  # ------------------------------------------------------------------------------
+
+  static_1 <- tune:::make_static(
+    submodel_only_wflow,
+    param_info = submodel_only_wflow |> extract_parameter_set_dials(),
+    metrics = metric_set(accuracy, roc_auc, brier_class),
+    eval_time = NULL,
+    split_args = rs_args,
+    control = tune::control_grid(save_pred = TRUE, extract = ext_1)
+  )
+
+  data_1 <- tune:::get_data_subsets(submodel_only_wflow, rs_split, rs_args)
+  static_1 <- tune:::update_static(static_1, data_1)
+  static_1$y_name <- "class"
+
+  submodel_only_res <- tune:::loop_over_all_stages(rs_iter, submodel_only_grid, static_1)
+  expect_true(!is.null(submodel_only_res$.metrics[[1]]))
+  expect_named(submodel_only_res, c(".metrics", ".notes", ".extracts", "id", ".predictions"))
+  expect_true(nrow(submodel_only_res) == 1)
+  expect_equal(
+    nrow(submodel_only_res$.predictions[[1]]),
+    nrow(data_1$pred$data) * nrow(submodel_only_grid)
+  )
+
+  extracted <- submodel_only_res$.extracts[[1]]
+  expect_identical(
+    nrow(extracted),
+    nrow(submodel_only_grid)  
+  )
+  expect_named(extracted, c(names(submodel_only_grid), ".extracts", ".config"))
+  expect_true(
+    all(vapply(extracted$.extracts, is_workflow, logical(1)))
+  )
+
+  # TODO more tests can be added when calibration method = "none" is implemented
+})