diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 13685e7f2204..c22d0ea848bb 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -25,8 +25,8 @@ CVBooster <- R6::R6Class( #' @description Cross validation logic used by LightGBM #' @inheritParams lgb_shared_params #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples. -#' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}} -#' @param weight vector of response values. If not NULL, will set to dataset +#' @param label Deprecated. See "Deprecated Arguments" section below. +#' @param weight Deprecated. See "Deprecated Arguments" section below. #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} #' @param showsd \code{boolean}, whether to show standard deviation of cross validation. #' This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a @@ -36,10 +36,8 @@ CVBooster <- R6::R6Class( #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds #' (each element must be a vector of test fold's indices). When folds are supplied, #' the \code{nfold} and \code{stratified} parameters are ignored. -#' @param colnames feature names, if not null, will use this to overwrite the names in dataset -#' @param categorical_feature categorical features. This can either be a character vector of feature -#' names or an integer vector with the indices of the features (e.g. -#' \code{c(1L, 10L)} to say "the first and tenth columns"). +#' @param colnames Deprecated. See "Deprecated Arguments" section below. +#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below. #' @param callbacks List of callback functions that are applied at each iteration. #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model #' into a predictor model which frees up memory and the original datasets @@ -70,6 +68,13 @@ CVBooster <- R6::R6Class( #' , nfold = 3L #' ) #' } +#' +#' @section Deprecated Arguments: +#' +#' A future release of \code{lightgbm} will require passing an \code{lgb.Dataset} +#' to argument \code{'data'}. It will also remove support for passing arguments +#' \code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}. +#' #' @importFrom data.table data.table setorderv #' @export lgb.cv <- function(params = list() @@ -102,12 +107,32 @@ lgb.cv <- function(params = list() # If 'data' is not an lgb.Dataset, try to construct one using 'label' if (!.is_Dataset(x = data)) { + warning(paste0( + "Passing anything other than an lgb.Dataset object to lgb.cv() is deprecated. " + , "Either pass an lgb.Dataset object, or use lightgbm()." + )) if (is.null(label)) { stop("'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'") } data <- lgb.Dataset(data = data, label = label) } + # raise deprecation warnings if necessary + # ref: https://github.com/microsoft/LightGBM/issues/6435 + args <- names(match.call()) + if ("categorical_feature" %in% args) { + .emit_dataset_kwarg_warning("categorical_feature", "lgb.cv") + } + if ("colnames" %in% args) { + .emit_dataset_kwarg_warning("colnames", "lgb.cv") + } + if ("label" %in% args) { + .emit_dataset_kwarg_warning("label", "lgb.cv") + } + if ("weight" %in% args) { + .emit_dataset_kwarg_warning("weight", "lgb.cv") + } + # set some parameters, resolving the way they were passed in with other parameters # in `params`. # this ensures that the model stored with Booster$save() correctly represents diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index 8a299fb6b8ac..dafb4d83b66b 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -6,10 +6,8 @@ #' @inheritParams lgb_shared_params #' @param valids a list of \code{lgb.Dataset} objects, used for validation #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} -#' @param colnames feature names, if not null, will use this to overwrite the names in dataset -#' @param categorical_feature categorical features. This can either be a character vector of feature -#' names or an integer vector with the indices of the features (e.g. -#' \code{c(1L, 10L)} to say "the first and tenth columns"). +#' @param colnames Deprecated. See "Deprecated Arguments" section below. +#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below. #' @param callbacks List of callback functions that are applied at each iteration. #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the #' booster model into a predictor model which frees up memory and the @@ -43,6 +41,13 @@ #' , early_stopping_rounds = 3L #' ) #' } +#' +#' @section Deprecated Arguments: +#' +#' A future release of \code{lightgbm} will remove support for passing arguments +#' \code{'categorical_feature'} and \code{'colnames'}. Pass those things to +#' \code{lgb.Dataset} instead. +#' #' @export lgb.train <- function(params = list(), data, @@ -78,6 +83,16 @@ lgb.train <- function(params = list(), } } + # raise deprecation warnings if necessary + # ref: https://github.com/microsoft/LightGBM/issues/6435 + args <- names(match.call()) + if ("categorical_feature" %in% args) { + .emit_dataset_kwarg_warning("categorical_feature", "lgb.train") + } + if ("colnames" %in% args) { + .emit_dataset_kwarg_warning("colnames", "lgb.train") + } + # set some parameters, resolving the way they were passed in with other parameters # in `params`. # this ensures that the model stored with Booster$save() correctly represents diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index f1a0090f950a..efa593ffe12f 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -144,6 +144,12 @@ NULL #' #' \emph{New in version 4.0.0} #' +#' @param colnames Character vector of features. Only used if \code{data} is not an \code{\link{lgb.Dataset}}. +#' @param categorical_feature categorical features. This can either be a character vector of feature +#' names or an integer vector with the indices of the features (e.g. +#' \code{c(1L, 10L)} to say "the first and tenth columns"). +#' Only used if \code{data} is not an \code{\link{lgb.Dataset}}. +#' #' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example #' \itemize{ #' \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation} @@ -152,10 +158,6 @@ NULL #' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} #' \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function} #' \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}} -#' \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset} -#' \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature -#' names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to -#' say "the first and tenth columns").} #' \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model #' into a predictor model which frees up memory and the original datasets} #' } @@ -176,6 +178,8 @@ lightgbm <- function(data, objective = "auto", init_score = NULL, num_threads = NULL, + colnames = NULL, + categorical_feature = NULL, ...) { # validate inputs early to avoid unnecessary computation @@ -221,7 +225,14 @@ lightgbm <- function(data, # Check whether data is lgb.Dataset, if not then create lgb.Dataset manually if (!.is_Dataset(x = dtrain)) { - dtrain <- lgb.Dataset(data = data, label = label, weight = weights, init_score = init_score) + dtrain <- lgb.Dataset( + data = data + , label = label + , weight = weights + , init_score = init_score + , categorical_feature = categorical_feature + , colnames = colnames + ) } train_args <- list( diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 1ac6f197ca77..646a306c97f6 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -260,3 +260,19 @@ return(a == b) } } + +# ref: https://github.com/microsoft/LightGBM/issues/6435 +.emit_dataset_kwarg_warning <- function(calling_function, argname) { + msg <- sprintf( + paste0( + "Argument '%s' to %s() is deprecated and will be removed in a future release. " + , "Set '%s' with lgb.Dataset() instead. " + , "See https://github.com/microsoft/LightGBM/issues/6435." + ) + , argname + , calling_function + , argname + ) + warning(msg) + return(invisible(NULL)) +} diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index 7ea2928c6166..cee059d494ca 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -41,9 +41,9 @@ may allow you to pass other types of data like \code{matrix} and then separately \item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.} -\item{label}{Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}} +\item{label}{Deprecated. See "Deprecated Arguments" section below.} -\item{weight}{vector of response values. If not NULL, will set to dataset} +\item{weight}{Deprecated. See "Deprecated Arguments" section below.} \item{obj}{objective function, can be character or custom objective function. Examples include \code{regression}, \code{regression_l1}, \code{huber}, @@ -103,11 +103,9 @@ the \code{nfold} and \code{stratified} parameters are ignored.} \item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model} -\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset} +\item{colnames}{Deprecated. See "Deprecated Arguments" section below.} -\item{categorical_feature}{categorical features. This can either be a character vector of feature -names or an integer vector with the indices of the features (e.g. -\code{c(1L, 10L)} to say "the first and tenth columns").} +\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.} \item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null, training will stop if the evaluation of any metric on any validation set @@ -133,6 +131,14 @@ a trained model \code{lgb.CVBooster}. \description{ Cross validation logic used by LightGBM } +\section{Deprecated Arguments}{ + + +A future release of \code{lightgbm} will require passing an \code{lgb.Dataset} +to argument \code{'data'}. It will also remove support for passing arguments +\code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}. +} + \section{Early Stopping}{ @@ -171,4 +177,5 @@ model <- lgb.cv( , nfold = 3L ) } + } diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd index 557c85b7f9dc..ebbfc206998e 100644 --- a/R-package/man/lgb.train.Rd +++ b/R-package/man/lgb.train.Rd @@ -82,11 +82,9 @@ printing of evaluation during training} \item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model} -\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset} +\item{colnames}{Deprecated. See "Deprecated Arguments" section below.} -\item{categorical_feature}{categorical features. This can either be a character vector of feature -names or an integer vector with the indices of the features (e.g. -\code{c(1L, 10L)} to say "the first and tenth columns").} +\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.} \item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null, training will stop if the evaluation of any metric on any validation set @@ -111,6 +109,14 @@ Low-level R interface to train a LightGBM model. Unlike \code{\link{lightgbm}}, this function is focused on performance (e.g. speed, memory efficiency). It is also less likely to have breaking API changes in new releases than \code{\link{lightgbm}}. } +\section{Deprecated Arguments}{ + + +A future release of \code{lightgbm} will remove support for passing arguments +\code{'categorical_feature'} and \code{'colnames'}. Pass those things to +\code{lgb.Dataset} instead. +} + \section{Early Stopping}{ @@ -154,4 +160,5 @@ model <- lgb.train( , early_stopping_rounds = 3L ) } + } diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd index 09d7704605c1..90cb3166bf5c 100644 --- a/R-package/man/lightgbm.Rd +++ b/R-package/man/lightgbm.Rd @@ -19,6 +19,8 @@ lightgbm( objective = "auto", init_score = NULL, num_threads = NULL, + colnames = NULL, + categorical_feature = NULL, ... ) } @@ -96,6 +98,13 @@ set to the iteration number of the best iteration.} \emph{New in version 4.0.0}} +\item{colnames}{Character vector of features. Only used if \code{data} is not an \code{\link{lgb.Dataset}}.} + +\item{categorical_feature}{categorical features. This can either be a character vector of feature +names or an integer vector with the indices of the features (e.g. +\code{c(1L, 10L)} to say "the first and tenth columns"). +Only used if \code{data} is not an \code{\link{lgb.Dataset}}.} + \item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example \itemize{ \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation} @@ -104,10 +113,6 @@ set to the iteration number of the best iteration.} \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function} \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}} - \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset} - \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature - names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to - say "the first and tenth columns").} \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model into a predictor model which frees up memory and the original datasets} }} diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 75abd26dd152..74c46dcef141 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -433,7 +433,7 @@ test_that("lgb.cv() rejects negative or 0 value passed to nrounds", { } }) -test_that("lgb.cv() throws an informative error is 'data' is not an lgb.Dataset and labels are not given", { +test_that("lgb.cv() throws an informative error if 'data' is not an lgb.Dataset and labels are not given", { bad_values <- list( 4L , "hello" @@ -1788,11 +1788,6 @@ test_that("lgb.train() works with early stopping for regression with a metric th test_that("lgb.train() supports non-ASCII feature names", { - dtrain <- lgb.Dataset( - data = matrix(rnorm(400L), ncol = 4L) - , label = rnorm(100L) - , params = list(num_threads = .LGB_MAX_THREADS) - ) # content below is equivalent to # # feature_names <- c("F_零", "F_一", "F_二", "F_三") @@ -1805,6 +1800,12 @@ test_that("lgb.train() supports non-ASCII feature names", { , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xba, 0x8c))) , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xb8, 0x89))) ) + dtrain <- lgb.Dataset( + data = matrix(rnorm(400L), ncol = 4L) + , label = rnorm(100L) + , params = list(num_threads = .LGB_MAX_THREADS) + , colnames = feature_names + ) bst <- lgb.train( data = dtrain , nrounds = 5L @@ -1814,7 +1815,6 @@ test_that("lgb.train() supports non-ASCII feature names", { , verbose = .LGB_VERBOSITY , num_threads = .LGB_MAX_THREADS ) - , colnames = feature_names ) expect_true(.is_Booster(bst)) dumped_model <- jsonlite::fromJSON(bst$dump_model()) @@ -2838,7 +2838,11 @@ test_that(paste0("lgb.train() gives same result when interaction_constraints is test_that(paste0("lgb.train() gives same results when using interaction_constraints and specifying colnames"), { set.seed(1L) - dtrain <- lgb.Dataset(train$data, label = train$label, params = list(num_threads = .LGB_MAX_THREADS)) + dtrain <- lgb.Dataset( + train$data + , label = train$label + , params = list(num_threads = .LGB_MAX_THREADS) + ) params <- list( objective = "regression" @@ -2854,6 +2858,7 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai pred1 <- bst$predict(test$data) new_colnames <- paste0(colnames(train$data), "_x") + dtrain$set_colnames(new_colnames) params <- list( objective = "regression" , interaction_constraints = list(c(new_colnames[1L], new_colnames[2L]), new_colnames[3L]) @@ -2864,7 +2869,6 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai data = dtrain , params = params , nrounds = 2L - , colnames = new_colnames ) pred2 <- bst$predict(test$data) diff --git a/examples/python-guide/advanced_example.py b/examples/python-guide/advanced_example.py index 4f0263286237..601a04d01481 100644 --- a/examples/python-guide/advanced_example.py +++ b/examples/python-guide/advanced_example.py @@ -25,9 +25,14 @@ num_train, num_feature = X_train.shape +# generate feature names +feature_name = [f"feature_{col}" for col in range(num_feature)] + # create dataset for lightgbm # if you want to re-use data, remember to set free_raw_data=False -lgb_train = lgb.Dataset(X_train, y_train, weight=W_train, free_raw_data=False) +lgb_train = lgb.Dataset( + X_train, y_train, weight=W_train, feature_name=feature_name, categorical_feature=[21], free_raw_data=False +) lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, weight=W_test, free_raw_data=False) # specify your configurations as a dict @@ -43,9 +48,6 @@ "verbose": 0, } -# generate feature names -feature_name = [f"feature_{col}" for col in range(num_feature)] - print("Starting training...") # feature_name and categorical_feature gbm = lgb.train( @@ -53,8 +55,6 @@ lgb_train, num_boost_round=10, valid_sets=lgb_train, # eval training data - feature_name=feature_name, - categorical_feature=[21], ) print("Finished first 10 rounds...") diff --git a/examples/python-guide/notebooks/interactive_plot_example.ipynb b/examples/python-guide/notebooks/interactive_plot_example.ipynb index 2cab2ff43881..cc8efa2c187b 100644 --- a/examples/python-guide/notebooks/interactive_plot_example.ipynb +++ b/examples/python-guide/notebooks/interactive_plot_example.ipynb @@ -78,7 +78,12 @@ "metadata": {}, "outputs": [], "source": [ - "lgb_train = lgb.Dataset(X_train, y_train)\n", + "lgb_train = lgb.Dataset(\n", + " X_train,\n", + " y_train,\n", + " feature_name=[f\"f{i + 1}\" for i in range(X_train.shape[-1])],\n", + " categorical_feature=[21],\n", + ")\n", "lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)" ] }, @@ -144,8 +149,6 @@ " lgb_train,\n", " num_boost_round=100,\n", " valid_sets=[lgb_train, lgb_test],\n", - " feature_name=[f\"f{i + 1}\" for i in range(X_train.shape[-1])],\n", - " categorical_feature=[21],\n", " callbacks=[lgb.log_evaluation(10), lgb.record_evaluation(evals_result)],\n", ")" ] diff --git a/examples/python-guide/plot_example.py b/examples/python-guide/plot_example.py index efbb971d52a4..eaef1e91b466 100644 --- a/examples/python-guide/plot_example.py +++ b/examples/python-guide/plot_example.py @@ -22,7 +22,12 @@ X_test = df_test.drop(0, axis=1) # create dataset for lightgbm -lgb_train = lgb.Dataset(X_train, y_train) +lgb_train = lgb.Dataset( + X_train, + y_train, + feature_name=[f"f{i + 1}" for i in range(X_train.shape[-1])], + categorical_feature=[21], +) lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train) # specify your configurations as a dict @@ -37,8 +42,6 @@ lgb_train, num_boost_round=100, valid_sets=[lgb_train, lgb_test], - feature_name=[f"f{i + 1}" for i in range(X_train.shape[-1])], - categorical_feature=[21], callbacks=[lgb.log_evaluation(10), lgb.record_evaluation(evals_result)], ) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 4a4ab8b4fd13..74b211f4a426 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -2,6 +2,7 @@ """Library with training routines of LightGBM.""" import copy import json +import warnings from collections import OrderedDict, defaultdict from operator import attrgetter from pathlib import Path @@ -13,6 +14,7 @@ from .basic import ( Booster, Dataset, + LGBMDeprecationWarning, LightGBMError, _choose_param_value, _ConfigAliases, @@ -51,6 +53,15 @@ ] +def _emit_dataset_kwarg_warning(calling_function: str, argname: str) -> None: + msg = ( + f"Argument '{argname}' to {calling_function}() is deprecated and will be removed in " + f"a future release. Set '{argname}' when calling lightgbm.Dataset() instead. " + "See https://github.com/microsoft/LightGBM/issues/6435." + ) + warnings.warn(msg, category=LGBMDeprecationWarning, stacklevel=2) + + def train( params: Dict[str, Any], train_set: Dataset, @@ -103,9 +114,11 @@ def train( init_model : str, pathlib.Path, Booster or None, optional (default=None) Filename of LightGBM model or Booster instance used for continue training. feature_name : list of str, or 'auto', optional (default="auto") + **Deprecated.** Set ``feature_name`` on ``train_set`` instead. Feature names. If 'auto' and data is pandas DataFrame, data columns names are used. categorical_feature : list of str or int, or 'auto', optional (default="auto") + **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead. Categorical features. If list of int, interpreted as indices. If list of str, interpreted as feature names (need to specify ``feature_name`` as well). @@ -166,6 +179,13 @@ def train( f"Item {i} has type '{type(valid_item).__name__}'." ) + # raise deprecation warnings if necessary + # ref: https://github.com/microsoft/LightGBM/issues/6435 + if categorical_feature != "auto": + _emit_dataset_kwarg_warning("train", "categorical_feature") + if feature_name != "auto": + _emit_dataset_kwarg_warning("train", "feature_name") + # create predictor first params = copy.deepcopy(params) params = _choose_param_value( @@ -625,9 +645,11 @@ def cv( init_model : str, pathlib.Path, Booster or None, optional (default=None) Filename of LightGBM model or Booster instance used for continue training. feature_name : list of str, or 'auto', optional (default="auto") + **Deprecated.** Set ``feature_name`` on ``train_set`` instead. Feature names. If 'auto' and data is pandas DataFrame, data columns names are used. categorical_feature : list of str or int, or 'auto', optional (default="auto") + **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead. Categorical features. If list of int, interpreted as indices. If list of str, interpreted as feature names (need to specify ``feature_name`` as well). @@ -693,6 +715,13 @@ def cv( if num_boost_round <= 0: raise ValueError(f"num_boost_round must be greater than 0. Got {num_boost_round}.") + # raise deprecation warnings if necessary + # ref: https://github.com/microsoft/LightGBM/issues/6435 + if categorical_feature != "auto": + _emit_dataset_kwarg_warning("cv", "categorical_feature") + if feature_name != "auto": + _emit_dataset_kwarg_warning("cv", "feature_name") + params = copy.deepcopy(params) params = _choose_param_value( main_param_name="objective", diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 0b4c99933652..1ea7b47c5462 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -862,6 +862,7 @@ def fit( group=group, init_score=init_score, categorical_feature=categorical_feature, + feature_name=feature_name, params=params, ) @@ -928,7 +929,6 @@ def _get_meta_data(collection, name, i): valid_names=eval_names, feval=eval_metrics_callable, # type: ignore[arg-type] init_model=init_model, - feature_name=feature_name, callbacks=callbacks, ) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index 29210b94b4a1..7b1009632626 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -1421,13 +1421,14 @@ def test_cvbooster_picklable(serializer): def test_feature_name(): X_train, y_train = make_synthetic_regression() params = {"verbose": -1} - lgb_train = lgb.Dataset(X_train, y_train) feature_names = [f"f_{i}" for i in range(X_train.shape[-1])] - gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names) + lgb_train = lgb.Dataset(X_train, y_train, feature_name=feature_names) + gbm = lgb.train(params, lgb_train, num_boost_round=5) assert feature_names == gbm.feature_name() # test feature_names with whitespaces feature_names_with_space = [f"f {i}" for i in range(X_train.shape[-1])] - gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names_with_space) + lgb_train.set_feature_name(feature_names_with_space) + gbm = lgb.train(params, lgb_train, num_boost_round=5) assert feature_names == gbm.feature_name() @@ -1437,9 +1438,9 @@ def test_feature_name_with_non_ascii(): # This has non-ascii strings. feature_names = ["F_零", "F_一", "F_二", "F_三"] params = {"verbose": -1} - lgb_train = lgb.Dataset(X_train, y_train) + lgb_train = lgb.Dataset(X_train, y_train, feature_name=feature_names) - gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names) + gbm = lgb.train(params, lgb_train, num_boost_round=5) assert feature_names == gbm.feature_name() gbm.save_model("lgb.model") diff --git a/tests/python_package_test/test_utilities.py b/tests/python_package_test/test_utilities.py index 08208ccfbf4a..3359d060e109 100644 --- a/tests/python_package_test/test_utilities.py +++ b/tests/python_package_test/test_utilities.py @@ -25,8 +25,8 @@ def dummy_metric(_, __): X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32) y = np.array([0, 1, 1, 0]) - lgb_train = lgb.Dataset(X, y) - lgb_valid = lgb.Dataset(X, y) # different object for early-stopping + lgb_train = lgb.Dataset(X, y, categorical_feature=[1]) + lgb_valid = lgb.Dataset(X, y, categorical_feature=[1]) # different object for early-stopping eval_records = {} callbacks = [lgb.record_evaluation(eval_records), lgb.log_evaluation(2), lgb.early_stopping(10)] @@ -36,7 +36,6 @@ def dummy_metric(_, __): num_boost_round=10, feval=dummy_metric, valid_sets=[lgb_valid], - categorical_feature=[1], callbacks=callbacks, ) @@ -151,12 +150,11 @@ def custom_warning(self, msg: str) -> None: logged_messages = [] X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32) y = np.array([0, 1, 1, 0]) - lgb_data = lgb.Dataset(X, y) + lgb_data = lgb.Dataset(X, y, categorical_feature=[1]) lgb.train( {"objective": "binary", "metric": "auc"}, lgb_data, num_boost_round=10, valid_sets=[lgb_data], - categorical_feature=[1], ) assert logged_messages, "custom logger was not called"