diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
index 13685e7f2204..c22d0ea848bb 100644
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -25,8 +25,8 @@ CVBooster <- R6::R6Class(
 #' @description Cross validation logic used by LightGBM
 #' @inheritParams lgb_shared_params
 #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
-#' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
-#' @param weight vector of response values. If not NULL, will set to dataset
+#' @param label Deprecated. See "Deprecated Arguments" section below.
+#' @param weight Deprecated. See "Deprecated Arguments" section below.
 #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
 #' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
 #'               This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
@@ -36,10 +36,8 @@ CVBooster <- R6::R6Class(
 #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
 #'              (each element must be a vector of test fold's indices). When folds are supplied,
 #'              the \code{nfold} and \code{stratified} parameters are ignored.
-#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
-#' @param categorical_feature categorical features. This can either be a character vector of feature
-#'                            names or an integer vector with the indices of the features (e.g.
-#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
+#' @param colnames Deprecated. See "Deprecated Arguments" section below.
+#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
 #' @param callbacks List of callback functions that are applied at each iteration.
 #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                   into a predictor model which frees up memory and the original datasets
@@ -70,6 +68,13 @@ CVBooster <- R6::R6Class(
 #'   , nfold = 3L
 #' )
 #' }
+#'
+#' @section Deprecated Arguments:
+#'
+#' A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
+#' to argument \code{'data'}. It will also remove support for passing arguments
+#' \code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
+#'
 #' @importFrom data.table data.table setorderv
 #' @export
 lgb.cv <- function(params = list()
@@ -102,12 +107,32 @@ lgb.cv <- function(params = list()
 
   # If 'data' is not an lgb.Dataset, try to construct one using 'label'
   if (!.is_Dataset(x = data)) {
+    warning(paste0(
+      "Passing anything other than an lgb.Dataset object to lgb.cv() is deprecated. "
+      , "Either pass an lgb.Dataset object, or use lightgbm()."
+    ))
     if (is.null(label)) {
       stop("'label' must be provided for lgb.cv if 'data' is not an 'lgb.Dataset'")
     }
     data <- lgb.Dataset(data = data, label = label)
   }
 
+  # raise deprecation warnings if necessary
+  # ref: https://github.com/microsoft/LightGBM/issues/6435
+  args <- names(match.call())
+  if ("categorical_feature" %in% args) {
+    .emit_dataset_kwarg_warning("categorical_feature", "lgb.cv")
+  }
+  if ("colnames" %in% args) {
+    .emit_dataset_kwarg_warning("colnames", "lgb.cv")
+  }
+  if ("label" %in% args) {
+    .emit_dataset_kwarg_warning("label", "lgb.cv")
+  }
+  if ("weight" %in% args) {
+    .emit_dataset_kwarg_warning("weight", "lgb.cv")
+  }
+
   # set some parameters, resolving the way they were passed in with other parameters
   # in `params`.
   # this ensures that the model stored with Booster$save() correctly represents
diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R
index 8a299fb6b8ac..dafb4d83b66b 100644
--- a/R-package/R/lgb.train.R
+++ b/R-package/R/lgb.train.R
@@ -6,10 +6,8 @@
 #' @inheritParams lgb_shared_params
 #' @param valids a list of \code{lgb.Dataset} objects, used for validation
 #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
-#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
-#' @param categorical_feature categorical features. This can either be a character vector of feature
-#'                            names or an integer vector with the indices of the features (e.g.
-#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
+#' @param colnames Deprecated. See "Deprecated Arguments" section below.
+#' @param categorical_feature Deprecated. See "Deprecated Arguments" section below.
 #' @param callbacks List of callback functions that are applied at each iteration.
 #' @param reset_data Boolean, setting it to TRUE (not the default value) will transform the
 #'                   booster model into a predictor model which frees up memory and the
@@ -43,6 +41,13 @@
 #'   , early_stopping_rounds = 3L
 #' )
 #' }
+#'
+#' @section Deprecated Arguments:
+#'
+#' A future release of \code{lightgbm} will remove support for passing arguments
+#' \code{'categorical_feature'} and \code{'colnames'}. Pass those things to
+#' \code{lgb.Dataset} instead.
+#'
 #' @export
 lgb.train <- function(params = list(),
                       data,
@@ -78,6 +83,16 @@ lgb.train <- function(params = list(),
     }
   }
 
+  # raise deprecation warnings if necessary
+  # ref: https://github.com/microsoft/LightGBM/issues/6435
+  args <- names(match.call())
+  if ("categorical_feature" %in% args) {
+    .emit_dataset_kwarg_warning("categorical_feature", "lgb.train")
+  }
+  if ("colnames" %in% args) {
+    .emit_dataset_kwarg_warning("colnames", "lgb.train")
+  }
+
   # set some parameters, resolving the way they were passed in with other parameters
   # in `params`.
   # this ensures that the model stored with Booster$save() correctly represents
diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R
index f1a0090f950a..efa593ffe12f 100644
--- a/R-package/R/lightgbm.R
+++ b/R-package/R/lightgbm.R
@@ -144,6 +144,12 @@ NULL
 #'
 #'                    \emph{New in version 4.0.0}
 #'
+#' @param colnames Character vector of features. Only used if \code{data} is not an \code{\link{lgb.Dataset}}.
+#' @param categorical_feature categorical features. This can either be a character vector of feature
+#'                            names or an integer vector with the indices of the features (e.g.
+#'                            \code{c(1L, 10L)} to say "the first and tenth columns").
+#'                            Only used if \code{data} is not an \code{\link{lgb.Dataset}}.
+#'
 #' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example
 #'     \itemize{
 #'        \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
@@ -152,10 +158,6 @@ NULL
 #'                    \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
 #'        \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
 #'        \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
-#'        \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
-#'        \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
-#'                            names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
-#'                            say "the first and tenth columns").}
 #'        \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
 #'                          into a predictor model which frees up memory and the original datasets}
 #'     }
@@ -176,6 +178,8 @@ lightgbm <- function(data,
                      objective = "auto",
                      init_score = NULL,
                      num_threads = NULL,
+                     colnames = NULL,
+                     categorical_feature = NULL,
                      ...) {
 
   # validate inputs early to avoid unnecessary computation
@@ -221,7 +225,14 @@ lightgbm <- function(data,
 
   # Check whether data is lgb.Dataset, if not then create lgb.Dataset manually
   if (!.is_Dataset(x = dtrain)) {
-    dtrain <- lgb.Dataset(data = data, label = label, weight = weights, init_score = init_score)
+    dtrain <- lgb.Dataset(
+      data = data
+      , label = label
+      , weight = weights
+      , init_score = init_score
+      , categorical_feature = categorical_feature
+      , colnames = colnames
+    )
   }
 
   train_args <- list(
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index 1ac6f197ca77..646a306c97f6 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -260,3 +260,19 @@
     return(a == b)
   }
 }
+
+# ref: https://github.com/microsoft/LightGBM/issues/6435
+.emit_dataset_kwarg_warning <- function(calling_function, argname) {
+  msg <- sprintf(
+    paste0(
+      "Argument '%s' to %s() is deprecated and will be removed in a future release. "
+      , "Set '%s' with lgb.Dataset() instead. "
+      , "See https://github.com/microsoft/LightGBM/issues/6435."
+    )
+    , argname
+    , calling_function
+    , argname
+  )
+  warning(msg)
+  return(invisible(NULL))
+}
diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd
index 7ea2928c6166..cee059d494ca 100644
--- a/R-package/man/lgb.cv.Rd
+++ b/R-package/man/lgb.cv.Rd
@@ -41,9 +41,9 @@ may allow you to pass other types of data like \code{matrix} and then separately
 
 \item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}
 
-\item{label}{Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}}
+\item{label}{Deprecated. See "Deprecated Arguments" section below.}
 
-\item{weight}{vector of response values. If not NULL, will set to dataset}
+\item{weight}{Deprecated. See "Deprecated Arguments" section below.}
 
 \item{obj}{objective function, can be character or custom objective function. Examples include
 \code{regression}, \code{regression_l1}, \code{huber},
@@ -103,11 +103,9 @@ the \code{nfold} and \code{stratified} parameters are ignored.}
 
 \item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model}
 
-\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
+\item{colnames}{Deprecated. See "Deprecated Arguments" section below.}
 
-\item{categorical_feature}{categorical features. This can either be a character vector of feature
-names or an integer vector with the indices of the features (e.g.
-\code{c(1L, 10L)} to say "the first and tenth columns").}
+\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.}
 
 \item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
 training will stop if the evaluation of any metric on any validation set
@@ -133,6 +131,14 @@ a trained model \code{lgb.CVBooster}.
 \description{
 Cross validation logic used by LightGBM
 }
+\section{Deprecated Arguments}{
+
+
+A future release of \code{lightgbm} will require passing an \code{lgb.Dataset}
+to argument \code{'data'}. It will also remove support for passing arguments
+\code{'categorical_feature'}, \code{'colnames'}, \code{'label'}, and \code{'weight'}.
+}
+
 \section{Early Stopping}{
 
 
@@ -171,4 +177,5 @@ model <- lgb.cv(
   , nfold = 3L
 )
 }
+
 }
diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd
index 557c85b7f9dc..ebbfc206998e 100644
--- a/R-package/man/lgb.train.Rd
+++ b/R-package/man/lgb.train.Rd
@@ -82,11 +82,9 @@ printing of evaluation during training}
 
 \item{init_model}{path of model file or \code{lgb.Booster} object, will continue training from this model}
 
-\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset}
+\item{colnames}{Deprecated. See "Deprecated Arguments" section below.}
 
-\item{categorical_feature}{categorical features. This can either be a character vector of feature
-names or an integer vector with the indices of the features (e.g.
-\code{c(1L, 10L)} to say "the first and tenth columns").}
+\item{categorical_feature}{Deprecated. See "Deprecated Arguments" section below.}
 
 \item{early_stopping_rounds}{int. Activates early stopping. When this parameter is non-null,
 training will stop if the evaluation of any metric on any validation set
@@ -111,6 +109,14 @@ Low-level R interface to train a LightGBM model. Unlike \code{\link{lightgbm}},
              this function is focused on performance (e.g. speed, memory efficiency). It is also
              less likely to have breaking API changes in new releases than \code{\link{lightgbm}}.
 }
+\section{Deprecated Arguments}{
+
+
+A future release of \code{lightgbm} will remove support for passing arguments
+\code{'categorical_feature'} and \code{'colnames'}. Pass those things to
+\code{lgb.Dataset} instead.
+}
+
 \section{Early Stopping}{
 
 
@@ -154,4 +160,5 @@ model <- lgb.train(
   , early_stopping_rounds = 3L
 )
 }
+
 }
diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd
index 09d7704605c1..90cb3166bf5c 100644
--- a/R-package/man/lightgbm.Rd
+++ b/R-package/man/lightgbm.Rd
@@ -19,6 +19,8 @@ lightgbm(
   objective = "auto",
   init_score = NULL,
   num_threads = NULL,
+  colnames = NULL,
+  categorical_feature = NULL,
   ...
 )
 }
@@ -96,6 +98,13 @@ set to the iteration number of the best iteration.}
 
                    \emph{New in version 4.0.0}}
 
+\item{colnames}{Character vector of features. Only used if \code{data} is not an \code{\link{lgb.Dataset}}.}
+
+\item{categorical_feature}{categorical features. This can either be a character vector of feature
+names or an integer vector with the indices of the features (e.g.
+\code{c(1L, 10L)} to say "the first and tenth columns").
+Only used if \code{data} is not an \code{\link{lgb.Dataset}}.}
+
 \item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example
 \itemize{
    \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation}
@@ -104,10 +113,6 @@ set to the iteration number of the best iteration.}
                \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}}
    \item{\code{eval}: evaluation function, can be (a list of) character or custom eval function}
    \item{\code{record}: Boolean, TRUE will record iteration message to \code{booster$record_evals}}
-   \item{\code{colnames}: feature names, if not null, will use this to overwrite the names in dataset}
-   \item{\code{categorical_feature}: categorical features. This can either be a character vector of feature
-                       names or an integer vector with the indices of the features (e.g. \code{c(1L, 10L)} to
-                       say "the first and tenth columns").}
    \item{\code{reset_data}: Boolean, setting it to TRUE (not the default value) will transform the booster model
                      into a predictor model which frees up memory and the original datasets}
 }}
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 75abd26dd152..74c46dcef141 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -433,7 +433,7 @@ test_that("lgb.cv() rejects negative or 0 value passed to nrounds", {
   }
 })
 
-test_that("lgb.cv() throws an informative error is 'data' is not an lgb.Dataset and labels are not given", {
+test_that("lgb.cv() throws an informative error if 'data' is not an lgb.Dataset and labels are not given", {
   bad_values <- list(
     4L
     , "hello"
@@ -1788,11 +1788,6 @@ test_that("lgb.train() works with early stopping for regression with a metric th
 
 
 test_that("lgb.train() supports non-ASCII feature names", {
-  dtrain <- lgb.Dataset(
-    data = matrix(rnorm(400L), ncol =  4L)
-    , label = rnorm(100L)
-    , params = list(num_threads = .LGB_MAX_THREADS)
-  )
   # content below is equivalent to
   #
   #  feature_names <- c("F_零", "F_一", "F_二", "F_三")
@@ -1805,6 +1800,12 @@ test_that("lgb.train() supports non-ASCII feature names", {
     , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xba, 0x8c)))
     , rawToChar(as.raw(c(0x46, 0x5f, 0xe4, 0xb8, 0x89)))
   )
+  dtrain <- lgb.Dataset(
+    data = matrix(rnorm(400L), ncol =  4L)
+    , label = rnorm(100L)
+    , params = list(num_threads = .LGB_MAX_THREADS)
+    , colnames = feature_names
+  )
   bst <- lgb.train(
     data = dtrain
     , nrounds = 5L
@@ -1814,7 +1815,6 @@ test_that("lgb.train() supports non-ASCII feature names", {
       , verbose = .LGB_VERBOSITY
       , num_threads = .LGB_MAX_THREADS
     )
-    , colnames = feature_names
   )
   expect_true(.is_Booster(bst))
   dumped_model <- jsonlite::fromJSON(bst$dump_model())
@@ -2838,7 +2838,11 @@ test_that(paste0("lgb.train() gives same result when interaction_constraints is
 
 test_that(paste0("lgb.train() gives same results when using interaction_constraints and specifying colnames"), {
   set.seed(1L)
-  dtrain <- lgb.Dataset(train$data, label = train$label, params = list(num_threads = .LGB_MAX_THREADS))
+  dtrain <- lgb.Dataset(
+    train$data
+    , label = train$label
+    , params = list(num_threads = .LGB_MAX_THREADS)
+  )
 
   params <- list(
     objective = "regression"
@@ -2854,6 +2858,7 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
   pred1 <- bst$predict(test$data)
 
   new_colnames <- paste0(colnames(train$data), "_x")
+  dtrain$set_colnames(new_colnames)
   params <- list(
     objective = "regression"
     , interaction_constraints = list(c(new_colnames[1L], new_colnames[2L]), new_colnames[3L])
@@ -2864,7 +2869,6 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai
     data = dtrain
     , params = params
     , nrounds = 2L
-    , colnames = new_colnames
   )
   pred2 <- bst$predict(test$data)
 
diff --git a/examples/python-guide/advanced_example.py b/examples/python-guide/advanced_example.py
index 4f0263286237..601a04d01481 100644
--- a/examples/python-guide/advanced_example.py
+++ b/examples/python-guide/advanced_example.py
@@ -25,9 +25,14 @@
 
 num_train, num_feature = X_train.shape
 
+# generate feature names
+feature_name = [f"feature_{col}" for col in range(num_feature)]
+
 # create dataset for lightgbm
 # if you want to re-use data, remember to set free_raw_data=False
-lgb_train = lgb.Dataset(X_train, y_train, weight=W_train, free_raw_data=False)
+lgb_train = lgb.Dataset(
+    X_train, y_train, weight=W_train, feature_name=feature_name, categorical_feature=[21], free_raw_data=False
+)
 lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, weight=W_test, free_raw_data=False)
 
 # specify your configurations as a dict
@@ -43,9 +48,6 @@
     "verbose": 0,
 }
 
-# generate feature names
-feature_name = [f"feature_{col}" for col in range(num_feature)]
-
 print("Starting training...")
 # feature_name and categorical_feature
 gbm = lgb.train(
@@ -53,8 +55,6 @@
     lgb_train,
     num_boost_round=10,
     valid_sets=lgb_train,  # eval training data
-    feature_name=feature_name,
-    categorical_feature=[21],
 )
 
 print("Finished first 10 rounds...")
diff --git a/examples/python-guide/notebooks/interactive_plot_example.ipynb b/examples/python-guide/notebooks/interactive_plot_example.ipynb
index 2cab2ff43881..cc8efa2c187b 100644
--- a/examples/python-guide/notebooks/interactive_plot_example.ipynb
+++ b/examples/python-guide/notebooks/interactive_plot_example.ipynb
@@ -78,7 +78,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "lgb_train = lgb.Dataset(X_train, y_train)\n",
+    "lgb_train = lgb.Dataset(\n",
+    "    X_train,\n",
+    "    y_train,\n",
+    "    feature_name=[f\"f{i + 1}\" for i in range(X_train.shape[-1])],\n",
+    "    categorical_feature=[21],\n",
+    ")\n",
     "lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)"
    ]
   },
@@ -144,8 +149,6 @@
     "    lgb_train,\n",
     "    num_boost_round=100,\n",
     "    valid_sets=[lgb_train, lgb_test],\n",
-    "    feature_name=[f\"f{i + 1}\" for i in range(X_train.shape[-1])],\n",
-    "    categorical_feature=[21],\n",
     "    callbacks=[lgb.log_evaluation(10), lgb.record_evaluation(evals_result)],\n",
     ")"
    ]
diff --git a/examples/python-guide/plot_example.py b/examples/python-guide/plot_example.py
index efbb971d52a4..eaef1e91b466 100644
--- a/examples/python-guide/plot_example.py
+++ b/examples/python-guide/plot_example.py
@@ -22,7 +22,12 @@
 X_test = df_test.drop(0, axis=1)
 
 # create dataset for lightgbm
-lgb_train = lgb.Dataset(X_train, y_train)
+lgb_train = lgb.Dataset(
+    X_train,
+    y_train,
+    feature_name=[f"f{i + 1}" for i in range(X_train.shape[-1])],
+    categorical_feature=[21],
+)
 lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)
 
 # specify your configurations as a dict
@@ -37,8 +42,6 @@
     lgb_train,
     num_boost_round=100,
     valid_sets=[lgb_train, lgb_test],
-    feature_name=[f"f{i + 1}" for i in range(X_train.shape[-1])],
-    categorical_feature=[21],
     callbacks=[lgb.log_evaluation(10), lgb.record_evaluation(evals_result)],
 )
 
diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py
index 4a4ab8b4fd13..74b211f4a426 100644
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -2,6 +2,7 @@
 """Library with training routines of LightGBM."""
 import copy
 import json
+import warnings
 from collections import OrderedDict, defaultdict
 from operator import attrgetter
 from pathlib import Path
@@ -13,6 +14,7 @@
 from .basic import (
     Booster,
     Dataset,
+    LGBMDeprecationWarning,
     LightGBMError,
     _choose_param_value,
     _ConfigAliases,
@@ -51,6 +53,15 @@
 ]
 
 
+def _emit_dataset_kwarg_warning(calling_function: str, argname: str) -> None:
+    msg = (
+        f"Argument '{argname}' to {calling_function}() is deprecated and will be removed in "
+        f"a future release. Set '{argname}' when calling lightgbm.Dataset() instead. "
+        "See https://github.com/microsoft/LightGBM/issues/6435."
+    )
+    warnings.warn(msg, category=LGBMDeprecationWarning, stacklevel=2)
+
+
 def train(
     params: Dict[str, Any],
     train_set: Dataset,
@@ -103,9 +114,11 @@ def train(
     init_model : str, pathlib.Path, Booster or None, optional (default=None)
         Filename of LightGBM model or Booster instance used for continue training.
     feature_name : list of str, or 'auto', optional (default="auto")
+        **Deprecated.** Set ``feature_name`` on ``train_set`` instead.
         Feature names.
         If 'auto' and data is pandas DataFrame, data columns names are used.
     categorical_feature : list of str or int, or 'auto', optional (default="auto")
+        **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead.
         Categorical features.
         If list of int, interpreted as indices.
         If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
@@ -166,6 +179,13 @@ def train(
                     f"Item {i} has type '{type(valid_item).__name__}'."
                 )
 
+    # raise deprecation warnings if necessary
+    # ref: https://github.com/microsoft/LightGBM/issues/6435
+    if categorical_feature != "auto":
+        _emit_dataset_kwarg_warning("train", "categorical_feature")
+    if feature_name != "auto":
+        _emit_dataset_kwarg_warning("train", "feature_name")
+
     # create predictor first
     params = copy.deepcopy(params)
     params = _choose_param_value(
@@ -625,9 +645,11 @@ def cv(
     init_model : str, pathlib.Path, Booster or None, optional (default=None)
         Filename of LightGBM model or Booster instance used for continue training.
     feature_name : list of str, or 'auto', optional (default="auto")
+        **Deprecated.** Set ``feature_name`` on ``train_set`` instead.
         Feature names.
         If 'auto' and data is pandas DataFrame, data columns names are used.
     categorical_feature : list of str or int, or 'auto', optional (default="auto")
+        **Deprecated.** Set ``categorical_feature`` on ``train_set`` instead.
         Categorical features.
         If list of int, interpreted as indices.
         If list of str, interpreted as feature names (need to specify ``feature_name`` as well).
@@ -693,6 +715,13 @@ def cv(
     if num_boost_round <= 0:
         raise ValueError(f"num_boost_round must be greater than 0. Got {num_boost_round}.")
 
+    # raise deprecation warnings if necessary
+    # ref: https://github.com/microsoft/LightGBM/issues/6435
+    if categorical_feature != "auto":
+        _emit_dataset_kwarg_warning("cv", "categorical_feature")
+    if feature_name != "auto":
+        _emit_dataset_kwarg_warning("cv", "feature_name")
+
     params = copy.deepcopy(params)
     params = _choose_param_value(
         main_param_name="objective",
diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 0b4c99933652..1ea7b47c5462 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -862,6 +862,7 @@ def fit(
             group=group,
             init_score=init_score,
             categorical_feature=categorical_feature,
+            feature_name=feature_name,
             params=params,
         )
 
@@ -928,7 +929,6 @@ def _get_meta_data(collection, name, i):
             valid_names=eval_names,
             feval=eval_metrics_callable,  # type: ignore[arg-type]
             init_model=init_model,
-            feature_name=feature_name,
             callbacks=callbacks,
         )
 
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 29210b94b4a1..7b1009632626 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1421,13 +1421,14 @@ def test_cvbooster_picklable(serializer):
 def test_feature_name():
     X_train, y_train = make_synthetic_regression()
     params = {"verbose": -1}
-    lgb_train = lgb.Dataset(X_train, y_train)
     feature_names = [f"f_{i}" for i in range(X_train.shape[-1])]
-    gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names)
+    lgb_train = lgb.Dataset(X_train, y_train, feature_name=feature_names)
+    gbm = lgb.train(params, lgb_train, num_boost_round=5)
     assert feature_names == gbm.feature_name()
     # test feature_names with whitespaces
     feature_names_with_space = [f"f {i}" for i in range(X_train.shape[-1])]
-    gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names_with_space)
+    lgb_train.set_feature_name(feature_names_with_space)
+    gbm = lgb.train(params, lgb_train, num_boost_round=5)
     assert feature_names == gbm.feature_name()
 
 
@@ -1437,9 +1438,9 @@ def test_feature_name_with_non_ascii():
     # This has non-ascii strings.
     feature_names = ["F_零", "F_一", "F_二", "F_三"]
     params = {"verbose": -1}
-    lgb_train = lgb.Dataset(X_train, y_train)
+    lgb_train = lgb.Dataset(X_train, y_train, feature_name=feature_names)
 
-    gbm = lgb.train(params, lgb_train, num_boost_round=5, feature_name=feature_names)
+    gbm = lgb.train(params, lgb_train, num_boost_round=5)
     assert feature_names == gbm.feature_name()
     gbm.save_model("lgb.model")
 
diff --git a/tests/python_package_test/test_utilities.py b/tests/python_package_test/test_utilities.py
index 08208ccfbf4a..3359d060e109 100644
--- a/tests/python_package_test/test_utilities.py
+++ b/tests/python_package_test/test_utilities.py
@@ -25,8 +25,8 @@ def dummy_metric(_, __):
 
     X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32)
     y = np.array([0, 1, 1, 0])
-    lgb_train = lgb.Dataset(X, y)
-    lgb_valid = lgb.Dataset(X, y)  # different object for early-stopping
+    lgb_train = lgb.Dataset(X, y, categorical_feature=[1])
+    lgb_valid = lgb.Dataset(X, y, categorical_feature=[1])  # different object for early-stopping
 
     eval_records = {}
     callbacks = [lgb.record_evaluation(eval_records), lgb.log_evaluation(2), lgb.early_stopping(10)]
@@ -36,7 +36,6 @@ def dummy_metric(_, __):
         num_boost_round=10,
         feval=dummy_metric,
         valid_sets=[lgb_valid],
-        categorical_feature=[1],
         callbacks=callbacks,
     )
 
@@ -151,12 +150,11 @@ def custom_warning(self, msg: str) -> None:
     logged_messages = []
     X = np.array([[1, 2, 3], [1, 2, 4], [1, 2, 4], [1, 2, 3]], dtype=np.float32)
     y = np.array([0, 1, 1, 0])
-    lgb_data = lgb.Dataset(X, y)
+    lgb_data = lgb.Dataset(X, y, categorical_feature=[1])
     lgb.train(
         {"objective": "binary", "metric": "auc"},
         lgb_data,
         num_boost_round=10,
         valid_sets=[lgb_data],
-        categorical_feature=[1],
     )
     assert logged_messages, "custom logger was not called"