add param aliases from scikit-learn (#4637)

microsoft · Oct 5, 2021 · e95d5ab · e95d5ab
1 parent 2543c8e
commit e95d5ab
Show file tree

Hide file tree

Showing 5 changed files with 41 additions and 21 deletions.
diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R
@@ -18,6 +18,7 @@
                 , "cat_feature"
                 , "categorical_column"
                 , "cat_column"
+                , "categorical_features"
             )
             , "data_random_seed" = c(
                 "data_random_seed"
@@ -61,7 +62,10 @@
                 "linear_tree"
                 , "linear_trees"
             )
-            , "max_bin" = "max_bin"
+            , "max_bin" = c(
+                "max_bin"
+                , "max_bins"
+            )
             , "max_bin_by_feature" = "max_bin_by_feature"
             , "min_data_in_bin" = "min_data_in_bin"
             , "pre_partition" = c(
@@ -111,6 +115,7 @@
             , "num_rounds"
             , "num_boost_round"
             , "n_estimators"
+            , "max_iter"
         )
     )
     return(c(learning_params, .DATASET_PARAMETERS()))

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
@@ -73,7 +73,7 @@ Core Parameters
 
    -  **Note**: can be used only in CLI version; for language-specific packages you can use the correspondent functions
 
--  ``objective`` :raw-html:`<a id="objective" title="Permalink to this parameter" href="#objective">&#x1F517;&#xFE0E;</a>`, default = ``regression``, type = enum, options: ``regression``, ``regression_l1``, ``huber``, ``fair``, ``poisson``, ``quantile``, ``mape``, ``gamma``, ``tweedie``, ``binary``, ``multiclass``, ``multiclassova``, ``cross_entropy``, ``cross_entropy_lambda``, ``lambdarank``, ``rank_xendcg``, aliases: ``objective_type``, ``app``, ``application``
+-  ``objective`` :raw-html:`<a id="objective" title="Permalink to this parameter" href="#objective">&#x1F517;&#xFE0E;</a>`, default = ``regression``, type = enum, options: ``regression``, ``regression_l1``, ``huber``, ``fair``, ``poisson``, ``quantile``, ``mape``, ``gamma``, ``tweedie``, ``binary``, ``multiclass``, ``multiclassova``, ``cross_entropy``, ``cross_entropy_lambda``, ``lambdarank``, ``rank_xendcg``, aliases: ``objective_type``, ``app``, ``application``, ``loss``
 
    -  regression application
 
@@ -153,7 +153,7 @@ Core Parameters
 
    -  **Note**: can be used only in CLI version
 
--  ``num_iterations`` :raw-html:`<a id="num_iterations" title="Permalink to this parameter" href="#num_iterations">&#x1F517;&#xFE0E;</a>`, default = ``100``, type = int, aliases: ``num_iteration``, ``n_iter``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``, constraints: ``num_iterations >= 0``
+-  ``num_iterations`` :raw-html:`<a id="num_iterations" title="Permalink to this parameter" href="#num_iterations">&#x1F517;&#xFE0E;</a>`, default = ``100``, type = int, aliases: ``num_iteration``, ``n_iter``, ``num_tree``, ``num_trees``, ``num_round``, ``num_rounds``, ``num_boost_round``, ``n_estimators``, ``max_iter``, constraints: ``num_iterations >= 0``
 
    -  number of boosting iterations
 
@@ -165,7 +165,7 @@ Core Parameters
 
    -  in ``dart``, it also affects on normalization weights of dropped trees
 
--  ``num_leaves`` :raw-html:`<a id="num_leaves" title="Permalink to this parameter" href="#num_leaves">&#x1F517;&#xFE0E;</a>`, default = ``31``, type = int, aliases: ``num_leaf``, ``max_leaves``, ``max_leaf``, constraints: ``1 < num_leaves <= 131072``
+-  ``num_leaves`` :raw-html:`<a id="num_leaves" title="Permalink to this parameter" href="#num_leaves">&#x1F517;&#xFE0E;</a>`, default = ``31``, type = int, aliases: ``num_leaf``, ``max_leaves``, ``max_leaf``, ``max_leaf_nodes``, constraints: ``1 < num_leaves <= 131072``
 
    -  max number of leaves in one tree
 
@@ -282,7 +282,7 @@ Learning Control Parameters
 
    -  ``<= 0`` means no limit
 
--  ``min_data_in_leaf`` :raw-html:`<a id="min_data_in_leaf" title="Permalink to this parameter" href="#min_data_in_leaf">&#x1F517;&#xFE0E;</a>`, default = ``20``, type = int, aliases: ``min_data_per_leaf``, ``min_data``, ``min_child_samples``, constraints: ``min_data_in_leaf >= 0``
+-  ``min_data_in_leaf`` :raw-html:`<a id="min_data_in_leaf" title="Permalink to this parameter" href="#min_data_in_leaf">&#x1F517;&#xFE0E;</a>`, default = ``20``, type = int, aliases: ``min_data_per_leaf``, ``min_data``, ``min_child_samples``, ``min_samples_leaf``, constraints: ``min_data_in_leaf >= 0``
 
    -  minimal number of data in one leaf. Can be used to deal with over-fitting
 
@@ -402,11 +402,11 @@ Learning Control Parameters
 
    -  the final max output of leaves is ``learning_rate * max_delta_step``
 
--  ``lambda_l1`` :raw-html:`<a id="lambda_l1" title="Permalink to this parameter" href="#lambda_l1">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``reg_alpha``, constraints: ``lambda_l1 >= 0.0``
+-  ``lambda_l1`` :raw-html:`<a id="lambda_l1" title="Permalink to this parameter" href="#lambda_l1">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``reg_alpha``, ``l1_regularization``, constraints: ``lambda_l1 >= 0.0``
 
    -  L1 regularization
 
--  ``lambda_l2`` :raw-html:`<a id="lambda_l2" title="Permalink to this parameter" href="#lambda_l2">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``reg_lambda``, ``lambda``, constraints: ``lambda_l2 >= 0.0``
+-  ``lambda_l2`` :raw-html:`<a id="lambda_l2" title="Permalink to this parameter" href="#lambda_l2">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``reg_lambda``, ``lambda``, ``l2_regularization``, constraints: ``lambda_l2 >= 0.0``
 
    -  L2 regularization
 
@@ -504,7 +504,7 @@ Learning Control Parameters
 
    -  set this to larger value for more accurate result, but it will slow down the training speed
 
--  ``monotone_constraints`` :raw-html:`<a id="monotone_constraints" title="Permalink to this parameter" href="#monotone_constraints">&#x1F517;&#xFE0E;</a>`, default = ``None``, type = multi-int, aliases: ``mc``, ``monotone_constraint``
+-  ``monotone_constraints`` :raw-html:`<a id="monotone_constraints" title="Permalink to this parameter" href="#monotone_constraints">&#x1F517;&#xFE0E;</a>`, default = ``None``, type = multi-int, aliases: ``mc``, ``monotone_constraint``, ``monotonic_cst``
 
    -  used for constraints of monotonic features
 
@@ -672,7 +672,7 @@ Dataset Parameters
 
       -  **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves
 
--  ``max_bin`` :raw-html:`<a id="max_bin" title="Permalink to this parameter" href="#max_bin">&#x1F517;&#xFE0E;</a>`, default = ``255``, type = int, constraints: ``max_bin > 1``
+-  ``max_bin`` :raw-html:`<a id="max_bin" title="Permalink to this parameter" href="#max_bin">&#x1F517;&#xFE0E;</a>`, default = ``255``, type = int, aliases: ``max_bins``, constraints: ``max_bin > 1``
 
    -  max number of bins that feature values will be bucketed in
 
@@ -806,7 +806,7 @@ Dataset Parameters
 
    -  **Note**: despite the fact that specified columns will be completely ignored during the training, they still should have a valid format allowing LightGBM to load file successfully
 
--  ``categorical_feature`` :raw-html:`<a id="categorical_feature" title="Permalink to this parameter" href="#categorical_feature">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = multi-int or string, aliases: ``cat_feature``, ``categorical_column``, ``cat_column``
+-  ``categorical_feature`` :raw-html:`<a id="categorical_feature" title="Permalink to this parameter" href="#categorical_feature">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = multi-int or string, aliases: ``cat_feature``, ``categorical_column``, ``cat_column``, ``categorical_features``
 
    -  used to specify categorical features
 

diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
@@ -109,7 +109,7 @@ struct Config {
   // [doc-only]
   // type = enum
   // options = regression, regression_l1, huber, fair, poisson, quantile, mape, gamma, tweedie, binary, multiclass, multiclassova, cross_entropy, cross_entropy_lambda, lambdarank, rank_xendcg
-  // alias = objective_type, app, application
+  // alias = objective_type, app, application, loss
   // desc = regression application
   // descl2 = ``regression``, L2 loss, aliases: ``regression_l2``, ``l2``, ``mean_squared_error``, ``mse``, ``l2_root``, ``root_mean_squared_error``, ``rmse``
   // descl2 = ``regression_l1``, L1 loss, aliases: ``l1``, ``mean_absolute_error``, ``mae``
@@ -161,7 +161,7 @@ struct Config {
   // desc = **Note**: can be used only in CLI version
   std::vector<std::string> valid;
 
-  // alias = num_iteration, n_iter, num_tree, num_trees, num_round, num_rounds, num_boost_round, n_estimators
+  // alias = num_iteration, n_iter, num_tree, num_trees, num_round, num_rounds, num_boost_round, n_estimators, max_iter
   // check = >=0
   // desc = number of boosting iterations
   // desc = **Note**: internally, LightGBM constructs ``num_class * num_iterations`` trees for multi-class classification problems
@@ -174,7 +174,7 @@ struct Config {
   double learning_rate = 0.1;
 
   // default = 31
-  // alias = num_leaf, max_leaves, max_leaf
+  // alias = num_leaf, max_leaves, max_leaf, max_leaf_nodes
   // check = >1
   // check = <=131072
   // desc = max number of leaves in one tree
@@ -261,7 +261,7 @@ struct Config {
   // desc = ``<= 0`` means no limit
   int max_depth = -1;
 
-  // alias = min_data_per_leaf, min_data, min_child_samples
+  // alias = min_data_per_leaf, min_data, min_child_samples, min_samples_leaf
   // check = >=0
   // desc = minimal number of data in one leaf. Can be used to deal with over-fitting
   // desc = **Note**: this is an approximation based on the Hessian, so occasionally you may observe splits which produce leaf nodes that have less than this many observations
@@ -360,12 +360,12 @@ struct Config {
   // desc = the final max output of leaves is ``learning_rate * max_delta_step``
   double max_delta_step = 0.0;
 
-  // alias = reg_alpha
+  // alias = reg_alpha, l1_regularization
   // check = >=0.0
   // desc = L1 regularization
   double lambda_l1 = 0.0;
 
-  // alias = reg_lambda, lambda
+  // alias = reg_lambda, lambda, l2_regularization
   // check = >=0.0
   // desc = L2 regularization
   double lambda_l2 = 0.0;
@@ -453,7 +453,7 @@ struct Config {
   int top_k = 20;
 
   // type = multi-int
-  // alias = mc, monotone_constraint
+  // alias = mc, monotone_constraint, monotonic_cst
   // default = None
   // desc = used for constraints of monotonic features
   // desc = ``1`` means increasing, ``-1`` means decreasing, ``0`` means non-constraint
@@ -586,6 +586,7 @@ struct Config {
   // descl2 = **Note**: if you specify ``monotone_constraints``, constraints will be enforced when choosing the split points, but not when fitting the linear models on leaves
   bool linear_tree = false;
 
+  // alias = max_bins
   // check = >1
   // desc = max number of bins that feature values will be bucketed in
   // desc = small number of bins may reduce training accuracy but may increase general power (deal with over-fitting)
@@ -691,7 +692,7 @@ struct Config {
   std::string ignore_column = "";
 
   // type = multi-int or string
-  // alias = cat_feature, categorical_column, cat_column
+  // alias = cat_feature, categorical_column, cat_column, categorical_features
   // desc = used to specify categorical features
   // desc = use number for index, e.g. ``categorical_feature=0,1,2`` means column\_0, column\_1 and column\_2 are categorical features
   // desc = add a prefix ``name:`` for column name, e.g. ``categorical_feature=name:c1,c2,c3`` means c1, c2 and c3 are categorical features

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
@@ -331,7 +331,8 @@ class _ConfigAliases:
                "categorical_feature": {"categorical_feature",
                                        "cat_feature",
                                        "categorical_column",
-                                       "cat_column"},
+                                       "cat_column",
+                                       "categorical_features"},
                "data_random_seed": {"data_random_seed",
                                     "data_seed"},
                "early_stopping_round": {"early_stopping_round",
@@ -371,6 +372,8 @@ class _ConfigAliases:
                "machines": {"machines",
                             "workers",
                             "nodes"},
+               "max_bin": {"max_bin",
+                           "max_bins"},
                "metric": {"metric",
                           "metrics",
                           "metric_types"},
@@ -384,7 +387,8 @@ class _ConfigAliases:
                                   "num_round",
                                   "num_rounds",
                                   "num_boost_round",
-                                  "n_estimators"},
+                                  "n_estimators",
+                                  "max_iter"},
                "num_machines": {"num_machines",
                                 "num_machine"},
                "num_threads": {"num_threads",
@@ -395,7 +399,8 @@ class _ConfigAliases:
                "objective": {"objective",
                              "objective_type",
                              "app",
-                             "application"},
+                             "application",
+                             "loss"},
                "pre_partition": {"pre_partition",
                                  "is_pre_partition"},
                "tree_learner": {"tree_learner",

diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
@@ -14,6 +14,7 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
   {"objective_type", "objective"},
   {"app", "objective"},
   {"application", "objective"},
+  {"loss", "objective"},
   {"boosting_type", "boosting"},
   {"boost", "boosting"},
   {"train", "data"},
@@ -34,11 +35,13 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
   {"num_rounds", "num_iterations"},
   {"num_boost_round", "num_iterations"},
   {"n_estimators", "num_iterations"},
+  {"max_iter", "num_iterations"},
   {"shrinkage_rate", "learning_rate"},
   {"eta", "learning_rate"},
   {"num_leaf", "num_leaves"},
   {"max_leaves", "num_leaves"},
   {"max_leaf", "num_leaves"},
+  {"max_leaf_nodes", "num_leaves"},
   {"tree", "tree_learner"},
   {"tree_type", "tree_learner"},
   {"tree_learner_type", "tree_learner"},
@@ -53,6 +56,7 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
   {"min_data_per_leaf", "min_data_in_leaf"},
   {"min_data", "min_data_in_leaf"},
   {"min_child_samples", "min_data_in_leaf"},
+  {"min_samples_leaf", "min_data_in_leaf"},
   {"min_sum_hessian_per_leaf", "min_sum_hessian_in_leaf"},
   {"min_sum_hessian", "min_sum_hessian_in_leaf"},
   {"min_hessian", "min_sum_hessian_in_leaf"},
@@ -79,13 +83,16 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
   {"max_tree_output", "max_delta_step"},
   {"max_leaf_output", "max_delta_step"},
   {"reg_alpha", "lambda_l1"},
+  {"l1_regularization", "lambda_l1"},
   {"reg_lambda", "lambda_l2"},
   {"lambda", "lambda_l2"},
+  {"l2_regularization", "lambda_l2"},
   {"min_split_gain", "min_gain_to_split"},
   {"rate_drop", "drop_rate"},
   {"topk", "top_k"},
   {"mc", "monotone_constraints"},
   {"monotone_constraint", "monotone_constraints"},
+  {"monotonic_cst", "monotone_constraints"},
   {"monotone_constraining_method", "monotone_constraints_method"},
   {"mc_method", "monotone_constraints_method"},
   {"monotone_splits_penalty", "monotone_penalty"},
@@ -106,6 +113,7 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
   {"model_out", "output_model"},
   {"save_period", "snapshot_freq"},
   {"linear_trees", "linear_tree"},
+  {"max_bins", "max_bin"},
   {"subsample_for_bin", "bin_construct_sample_cnt"},
   {"data_seed", "data_random_seed"},
   {"is_sparse", "is_enable_sparse"},
@@ -129,6 +137,7 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
   {"cat_feature", "categorical_feature"},
   {"categorical_column", "categorical_feature"},
   {"cat_column", "categorical_feature"},
+  {"categorical_features", "categorical_feature"},
   {"is_save_binary", "save_binary"},
   {"is_save_binary_file", "save_binary"},
   {"is_predict_raw_score", "predict_raw_score"},