tidymodels · topepo · Jun 7, 2020 · May 27, 2020 · May 27, 2020 · May 28, 2020
diff --git a/NAMESPACE b/NAMESPACE
@@ -100,7 +100,6 @@ export(boost_tree)
 export(check_empty_ellipse)
 export(check_final_param)
 export(control_parsnip)
-export(convert_args)
 export(convert_stan_interval)
 export(decision_tree)
 export(eval_args)

diff --git a/R/aaa.R b/R/aaa.R
@@ -30,33 +30,6 @@ convert_stan_interval <- function(x, level = 0.95, lower = TRUE) {
   res
 }
 
-#' Make a table of arguments
-#' @param model_name A character string for the model
-#' @keywords internal
-#' @export
-convert_args <- function(model_name) {
-  envir <- get_model_env()
-
-  args <-
-    ls(envir) %>%
-    tibble::tibble(name = .) %>%
-    dplyr::filter(grepl("args", name)) %>%
-    dplyr::mutate(model = sub("_args", "", name),
-                  args  = purrr::map(name, ~envir[[.x]])) %>%
-    tidyr::unnest(args) %>%
-    dplyr::select(model:original)
-
-  convert_df <- args %>%
-    dplyr::filter(grepl(model_name, model)) %>%
-    dplyr::select(-model) %>%
-    tidyr::pivot_wider(names_from = engine, values_from = original)
-
-  convert_df %>%
-    knitr::kable(col.names = paste0("**", colnames(convert_df), "**"))
-
-}
-
-
 # ------------------------------------------------------------------------------
 # nocov
 

diff --git a/R/boost_tree_data.R b/R/boost_tree_data.R
@@ -317,8 +317,8 @@ set_model_arg(
 set_model_arg(
   model = "boost_tree",
   eng = "spark",
-  parsnip = "min_info_gain",
-  original = "loss_reduction",
+  parsnip = "loss_reduction",
+  original = "min_info_gain",
   func = list(pkg = "dials", fun = "loss_reduction"),
   has_submodel = FALSE
 )

diff --git a/man/boost_tree.Rd b/man/boost_tree.Rd
diff --git a/man/convert_args.Rd b/man/convert_args.Rd
diff --git a/man/decision_tree.Rd b/man/decision_tree.Rd
diff --git a/man/linear_reg.Rd b/man/linear_reg.Rd
diff --git a/man/logistic_reg.Rd b/man/logistic_reg.Rd
diff --git a/man/mars.Rd b/man/mars.Rd
diff --git a/man/mlp.Rd b/man/mlp.Rd
diff --git a/man/multinom_reg.Rd b/man/multinom_reg.Rd
diff --git a/man/nearest_neighbor.Rd b/man/nearest_neighbor.Rd
diff --git a/man/rand_forest.Rd b/man/rand_forest.Rd
diff --git a/man/rmd/boost-tree.Rmd b/man/rmd/boost-tree.Rmd
@@ -1,5 +1,8 @@
 # Engine Details
 
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
 Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below:
 
 ## xgboost
@@ -50,9 +53,33 @@ boost_tree() %>%
 
 ## Parameter translations
 
-The standardized parameter names in parsnip can be mapped to their original names in each engine that has main parameters:
+The standardized parameter names in parsnip can be mapped to their original names in each engine that has main parameters. Each engine typically has a different default value (shown in parentheses) for each parameter.
 
 ```{r echo = FALSE, results = "asis"}
-parsnip::convert_args("boost_tree")
+get_defaults_boost_tree <- function() {
+  tibble::tribble(
+    ~model,         ~engine,          ~parsnip,                 ~original,  ~default,
+    "boost_tree", "xgboost",      "tree_depth",                "max_depth", get_arg("parsnip", "xgb_train", "max_depth"),
+    "boost_tree", "xgboost",           "trees",                 "nrounds",  get_arg("parsnip", "xgb_train", "nrounds"),
+    "boost_tree", "xgboost",      "learn_rate",                     "eta",  get_arg("parsnip", "xgb_train", "eta"),
+    "boost_tree", "xgboost",            "mtry",        "colsample_bytree",  get_arg("parsnip", "xgb_train", "colsample_bytree"),
+    "boost_tree", "xgboost",           "min_n",        "min_child_weight",  get_arg("parsnip", "xgb_train", "min_child_weight"),
+    "boost_tree", "xgboost",  "loss_reduction",                   "gamma",  get_arg("parsnip", "xgb_train", "gamma"),
+    "boost_tree", "xgboost",     "sample_size",               "subsample",  get_arg("parsnip", "xgb_train", "subsample"),
+    "boost_tree",    "C5.0",           "trees",                "trials",    get_arg("parsnip", "C5.0_train", "trials"),
+    "boost_tree",    "C5.0",           "min_n",                "minCases",  get_arg("C50", "C5.0Control", "minCases"),
+    "boost_tree",    "C5.0",     "sample_size",                  "sample",  get_arg("C50", "C5.0Control", "sample"),
+    "boost_tree", "spark",        "tree_depth",               "max_depth",  get_arg("sparklyr", "ml_gradient_boosted_trees", "max_depth"),
+    "boost_tree", "spark",             "trees",                "max_iter",  get_arg("sparklyr", "ml_gradient_boosted_trees", "max_iter"),
+    "boost_tree", "spark",        "learn_rate",               "step_size",  get_arg("sparklyr", "ml_gradient_boosted_trees", "step_size"),
+    "boost_tree", "spark",              "mtry", "feature_subset_strategy",  "see below",
+    "boost_tree", "spark",             "min_n",  "min_instances_per_node",  get_arg("sparklyr", "ml_gradient_boosted_trees", "min_instances_per_node"),
+    "boost_tree", "spark",    "loss_reduction",        "min_info_gain",     get_arg("sparklyr", "ml_gradient_boosted_trees", "min_info_gain"),
+    "boost_tree", "spark",       "sample_size",        "subsampling_rate",  get_arg("sparklyr", "ml_gradient_boosted_trees", "subsampling_rate"),
+
+  )
+}
+convert_args("boost_tree")
 ```
 
+For spark, the default `mtry` is the square root of the number of predictors for classification, and one-third of the predictors for regression.
diff --git a/man/rmd/decision-tree.Rmd b/man/rmd/decision-tree.Rmd
@@ -1,5 +1,8 @@
 # Engine Details
 
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
 Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below:
 
 ## rpart
@@ -52,9 +55,20 @@ decision_tree() %>%
 
 ## Parameter translations
 
-The standardized parameter names in parsnip can be mapped to their original names in each engine that has main parameters:
+The standardized parameter names in parsnip can be mapped to their original names in each engine that has main parameters. Each engine typically has a different default value (shown in parentheses) for each parameter.
 
 ```{r echo = FALSE, results = "asis"}
-parsnip::convert_args("decision_tree")
+get_defaults_decision_tree <- function() {
+  tibble::tribble(
+    ~model,         ~engine,          ~parsnip,                ~original,  ~default,
+    "decision_tree", "rpart",      "tree_depth",               "maxdepth", get_arg("rpart", "rpart.control", "maxdepth"),
+    "decision_tree", "rpart",           "min_n",               "minsplit", get_arg("rpart", "rpart.control", "minsplit"),
+    "decision_tree", "rpart", "cost_complexity",                     "cp", get_arg("rpart", "rpart.control", "cp"),
+    "decision_tree",  "C5.0",           "min_n",               "minCases", get_arg("C50", "C5.0Control", "minCases"),
+    "decision_tree", "spark",      "tree_depth",              "max_depth", get_arg("sparklyr", "ml_decision_tree", "max_depth"),
+    "decision_tree", "spark",           "min_n", "min_instances_per_node", get_arg("sparklyr", "ml_decision_tree", "min_instances_per_node"),
+  )
+}
+convert_args("decision_tree")
 ```
 
diff --git a/man/rmd/linear-reg.Rmd b/man/rmd/linear-reg.Rmd
@@ -1,5 +1,8 @@
 # Engine Details
 
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
 Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below.
 
 ## lm
@@ -68,10 +71,20 @@ linear_reg() %>%
 
 ## Parameter translations
 
-The standardized parameter names in parsnip can be mapped to their original names
-in each engine that has main parameters:
+The standardized parameter names in parsnip can be mapped to their original 
+names in each engine that has main parameters. Each engine typically has a 
+different default value (shown in parentheses) for each parameter.
 
 ```{r echo = FALSE, results = "asis"}
-parsnip::convert_args("linear_reg")
+get_defaults_linear_reg <- function() {
+  tibble::tribble(
+    ~model,         ~engine,     ~parsnip,            ~original,  ~default,
+    "linear_reg",  "glmnet",    "mixture",              "alpha",  get_arg("glmnet", "glmnet", "alpha"),
+    "linear_reg",  "spark",     "penalty",          "reg_param",  get_arg("sparklyr", "ml_linear_regression", "reg_param"),
+    "linear_reg",  "spark",     "mixture",  "elastic_net_param",  get_arg("sparklyr", "ml_linear_regression", "elastic_net_param"),
+    "linear_reg",  "keras",     "penalty",            "penalty",  get_arg("parsnip", "keras_mlp", "penalty"),
+  )
+}
+convert_args("linear_reg")
 ```