From 31facb40020a89cb9dafee891f5c62aeb66862db Mon Sep 17 00:00:00 2001
From: david-cortes <david.cortes.rivera@gmail.com>
Date: Wed, 23 Feb 2022 15:24:35 +0200
Subject: [PATCH] [R-package] Promote `objective` and `init_score` to top-level
 arguments in `lightgbm()` (#4976)

* promote objective and init_score to top-level arguments

* follow comments

* Update R-package/R/lightgbm.R

Co-authored-by: James Lamb <jaylamb20@gmail.com>

* update docs

* linter

* comments

* comments

* comments

* extend test for default objective

* Update R-package/tests/testthat/test_basic.R

Co-authored-by: James Lamb <jaylamb20@gmail.com>
---
 R-package/R/lightgbm.R                |  10 ++-
 R-package/man/lightgbm.Rd             |   9 +++
 R-package/tests/testthat/test_basic.R | 105 ++++++++++++++++++++++++++
 3 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R
index f96b868b13cd..605f677f19b0 100644
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -92,6 +92,11 @@ NULL
 #' @inheritParams lgb_shared_params
 #' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
 #' @param weight vector of response values. If not NULL, will set to dataset
+#' @param objective Optimization objective (e.g. `"regression"`, `"binary"`, etc.).
+#'                  For a list of accepted objectives, see
+#'                  \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
+#'                  the "Parameters" section of the documentation}.
+#' @param init_score initial score is the base prediction lightgbm will boost from
 #' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example
 #'     \itemize{
 #'        \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
@@ -121,6 +126,8 @@ lightgbm <- function(data,
                      init_model = NULL,
                      callbacks = list(),
                      serializable = TRUE,
+                     objective = "regression",
+                     init_score = NULL,
                      ...) {
 
   # validate inputs early to avoid unnecessary computation
@@ -133,13 +140,14 @@ lightgbm <- function(data,
 
   # Check whether data is lgb.Dataset, if not then create lgb.Dataset manually
   if (!lgb.is.Dataset(x = dtrain)) {
-    dtrain <- lgb.Dataset(data = data, label = label, weight = weight)
+    dtrain <- lgb.Dataset(data = data, label = label, weight = weight, init_score = init_score)
   }
 
   train_args <- list(
     "params" = params
     , "data" = dtrain
     , "nrounds" = nrounds
+    , "obj" = objective
     , "verbose" = verbose
     , "eval_freq" = eval_freq
     , "early_stopping_rounds" = early_stopping_rounds
diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd
index ab57baedbd6e..96beee791c00 100644
--- a/R-package/man/lightgbm.Rd
+++ b/R-package/man/lightgbm.Rd
@@ -16,6 +16,8 @@ lightgbm(
   init_model = NULL,
   callbacks = list(),
   serializable = TRUE,
+  objective = "regression",
+  init_score = NULL,
   ...
 )
 }
@@ -50,6 +52,13 @@ set to the iteration number of the best iteration.}
 \item{serializable}{whether to make the resulting objects serializable through functions such as
 \code{save} or \code{saveRDS} (see section "Model serialization").}
 
+\item{objective}{Optimization objective (e.g. `"regression"`, `"binary"`, etc.).
+For a list of accepted objectives, see
+\href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{
+the "Parameters" section of the documentation}.}
+
+\item{init_score}{initial score is the base prediction lightgbm will boost from}
+
 \item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example
 \itemize{
    \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index ab5accab6144..8ba2b08abec5 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -2812,3 +2812,108 @@ for (x3_to_categorical in c(TRUE, FALSE)) {
     })
   }
 }
+
+test_that("lightgbm() accepts objective as function argument and under params", {
+  bst1 <- lightgbm(
+    data = train$data
+    , label = train$label
+    , params = list(objective = "regression_l1")
+    , nrounds = 5L
+    , verbose = -1L
+  )
+  expect_equal(bst1$params$objective, "regression_l1")
+  model_txt_lines <- strsplit(
+    x = bst1$save_model_to_string()
+    , split = "\n"
+  )[[1L]]
+  expect_true(any(model_txt_lines == "objective=regression_l1"))
+  expect_false(any(model_txt_lines == "objective=regression_l2"))
+
+  bst2 <- lightgbm(
+    data = train$data
+    , label = train$label
+    , objective = "regression_l1"
+    , nrounds = 5L
+    , verbose = -1L
+  )
+  expect_equal(bst2$params$objective, "regression_l1")
+  model_txt_lines <- strsplit(
+    x = bst2$save_model_to_string()
+    , split = "\n"
+  )[[1L]]
+  expect_true(any(model_txt_lines == "objective=regression_l1"))
+  expect_false(any(model_txt_lines == "objective=regression_l2"))
+})
+
+test_that("lightgbm() prioritizes objective under params over objective as function argument", {
+  bst1 <- lightgbm(
+    data = train$data
+    , label = train$label
+    , objective = "regression"
+    , params = list(objective = "regression_l1")
+    , nrounds = 5L
+    , verbose = -1L
+  )
+  expect_equal(bst1$params$objective, "regression_l1")
+  model_txt_lines <- strsplit(
+    x = bst1$save_model_to_string()
+    , split = "\n"
+  )[[1L]]
+  expect_true(any(model_txt_lines == "objective=regression_l1"))
+  expect_false(any(model_txt_lines == "objective=regression_l2"))
+
+  bst2 <- lightgbm(
+    data = train$data
+    , label = train$label
+    , objective = "regression"
+    , params = list(loss = "regression_l1")
+    , nrounds = 5L
+    , verbose = -1L
+  )
+  expect_equal(bst2$params$objective, "regression_l1")
+  model_txt_lines <- strsplit(
+    x = bst2$save_model_to_string()
+    , split = "\n"
+  )[[1L]]
+  expect_true(any(model_txt_lines == "objective=regression_l1"))
+  expect_false(any(model_txt_lines == "objective=regression_l2"))
+})
+
+test_that("lightgbm() accepts init_score as function argument", {
+  bst1 <- lightgbm(
+    data = train$data
+    , label = train$label
+    , objective = "binary"
+    , nrounds = 5L
+    , verbose = -1L
+  )
+  pred1 <- predict(bst1, train$data, rawscore = TRUE)
+
+  bst2 <- lightgbm(
+    data = train$data
+    , label = train$label
+    , init_score = pred1
+    , objective = "binary"
+    , nrounds = 5L
+    , verbose = -1L
+  )
+  pred2 <- predict(bst2, train$data, rawscore = TRUE)
+
+  expect_true(any(pred1 != pred2))
+})
+
+test_that("lightgbm() defaults to 'regression' objective if objective not otherwise provided", {
+  bst <- lightgbm(
+    data = train$data
+    , label = train$label
+    , nrounds = 5L
+    , verbose = -1L
+  )
+  expect_equal(bst$params$objective, "regression")
+  model_txt_lines <- strsplit(
+    x = bst$save_model_to_string()
+    , split = "\n"
+  )[[1L]]
+  expect_true(any(model_txt_lines == "objective=regression"))
+  expect_false(any(model_txt_lines == "objective=regression_l1"))
+})