From 31facb40020a89cb9dafee891f5c62aeb66862db Mon Sep 17 00:00:00 2001 From: david-cortes Date: Wed, 23 Feb 2022 15:24:35 +0200 Subject: [PATCH] [R-package] Promote `objective` and `init_score` to top-level arguments in `lightgbm()` (#4976) * promote objective and init_score to top-level arguments * follow comments * Update R-package/R/lightgbm.R Co-authored-by: James Lamb * update docs * linter * comments * comments * comments * extend test for default objective * Update R-package/tests/testthat/test_basic.R Co-authored-by: James Lamb --- R-package/R/lightgbm.R | 10 ++- R-package/man/lightgbm.Rd | 9 +++ R-package/tests/testthat/test_basic.R | 105 ++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 1 deletion(-) diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index f96b868b13cd..605f677f19b0 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -92,6 +92,11 @@ NULL #' @inheritParams lgb_shared_params #' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}} #' @param weight vector of response values. If not NULL, will set to dataset +#' @param objective Optimization objective (e.g. `"regression"`, `"binary"`, etc.). +#' For a list of accepted objectives, see +#' \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{ +#' the "Parameters" section of the documentation}. +#' @param init_score initial score is the base prediction lightgbm will boost from #' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example #' \itemize{ #' \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation} @@ -121,6 +126,8 @@ lightgbm <- function(data, init_model = NULL, callbacks = list(), serializable = TRUE, + objective = "regression", + init_score = NULL, ...) { # validate inputs early to avoid unnecessary computation @@ -133,13 +140,14 @@ lightgbm <- function(data, # Check whether data is lgb.Dataset, if not then create lgb.Dataset manually if (!lgb.is.Dataset(x = dtrain)) { - dtrain <- lgb.Dataset(data = data, label = label, weight = weight) + dtrain <- lgb.Dataset(data = data, label = label, weight = weight, init_score = init_score) } train_args <- list( "params" = params , "data" = dtrain , "nrounds" = nrounds + , "obj" = objective , "verbose" = verbose , "eval_freq" = eval_freq , "early_stopping_rounds" = early_stopping_rounds diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd index ab57baedbd6e..96beee791c00 100644 --- a/R-package/man/lightgbm.Rd +++ b/R-package/man/lightgbm.Rd @@ -16,6 +16,8 @@ lightgbm( init_model = NULL, callbacks = list(), serializable = TRUE, + objective = "regression", + init_score = NULL, ... ) } @@ -50,6 +52,13 @@ set to the iteration number of the best iteration.} \item{serializable}{whether to make the resulting objects serializable through functions such as \code{save} or \code{saveRDS} (see section "Model serialization").} +\item{objective}{Optimization objective (e.g. `"regression"`, `"binary"`, etc.). +For a list of accepted objectives, see +\href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{ +the "Parameters" section of the documentation}.} + +\item{init_score}{initial score is the base prediction lightgbm will boost from} + \item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example \itemize{ \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation} diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index ab5accab6144..8ba2b08abec5 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -2812,3 +2812,108 @@ for (x3_to_categorical in c(TRUE, FALSE)) { }) } } + +test_that("lightgbm() accepts objective as function argument and under params", { + bst1 <- lightgbm( + data = train$data + , label = train$label + , params = list(objective = "regression_l1") + , nrounds = 5L + , verbose = -1L + ) + expect_equal(bst1$params$objective, "regression_l1") + model_txt_lines <- strsplit( + x = bst1$save_model_to_string() + , split = "\n" + )[[1L]] + expect_true(any(model_txt_lines == "objective=regression_l1")) + expect_false(any(model_txt_lines == "objective=regression_l2")) + + bst2 <- lightgbm( + data = train$data + , label = train$label + , objective = "regression_l1" + , nrounds = 5L + , verbose = -1L + ) + expect_equal(bst2$params$objective, "regression_l1") + model_txt_lines <- strsplit( + x = bst2$save_model_to_string() + , split = "\n" + )[[1L]] + expect_true(any(model_txt_lines == "objective=regression_l1")) + expect_false(any(model_txt_lines == "objective=regression_l2")) +}) + +test_that("lightgbm() prioritizes objective under params over objective as function argument", { + bst1 <- lightgbm( + data = train$data + , label = train$label + , objective = "regression" + , params = list(objective = "regression_l1") + , nrounds = 5L + , verbose = -1L + ) + expect_equal(bst1$params$objective, "regression_l1") + model_txt_lines <- strsplit( + x = bst1$save_model_to_string() + , split = "\n" + )[[1L]] + expect_true(any(model_txt_lines == "objective=regression_l1")) + expect_false(any(model_txt_lines == "objective=regression_l2")) + + bst2 <- lightgbm( + data = train$data + , label = train$label + , objective = "regression" + , params = list(loss = "regression_l1") + , nrounds = 5L + , verbose = -1L + ) + expect_equal(bst2$params$objective, "regression_l1") + model_txt_lines <- strsplit( + x = bst2$save_model_to_string() + , split = "\n" + )[[1L]] + expect_true(any(model_txt_lines == "objective=regression_l1")) + expect_false(any(model_txt_lines == "objective=regression_l2")) +}) + +test_that("lightgbm() accepts init_score as function argument", { + bst1 <- lightgbm( + data = train$data + , label = train$label + , objective = "binary" + , nrounds = 5L + , verbose = -1L + ) + pred1 <- predict(bst1, train$data, rawscore = TRUE) + + bst2 <- lightgbm( + data = train$data + , label = train$label + , init_score = pred1 + , objective = "binary" + , nrounds = 5L + , verbose = -1L + ) + pred2 <- predict(bst2, train$data, rawscore = TRUE) + + expect_true(any(pred1 != pred2)) +}) + +test_that("lightgbm() defaults to 'regression' objective if objective not otherwise provided", { + bst <- lightgbm( + data = train$data + , label = train$label + , nrounds = 5L + , verbose = -1L + ) + expect_equal(bst$params$objective, "regression") + model_txt_lines <- strsplit( + x = bst$save_model_to_string() + , split = "\n" + )[[1L]] + expect_true(any(model_txt_lines == "objective=regression")) + expect_false(any(model_txt_lines == "objective=regression_l1")) +})