@@ -359,7 +359,7 @@ class CartModel(core.CoreModel):
359
359
split_axis: What structure of split to consider for numerical features. -
360
360
`AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
361
361
is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
362
- Sparse oblique splits (i.e. random splits one a small number of features)
362
+ Sparse oblique splits (i.e. random splits on a small number of features)
363
363
from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
364
364
`MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
365
365
"Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
@@ -1030,6 +1030,9 @@ class GradientBoostedTreesModel(core.CoreModel):
1030
1030
variable importance of the model at the end of the training using the
1031
1031
validation dataset. Enabling this feature can increase the training time
1032
1032
significantly. Default: False.
1033
+ cross_entropy_ndcg_truncation: Truncation of the cross-entropy NDCG loss
1034
+ (default 5). Only used with cross-entropy NDCG loss i.e.
1035
+ `loss="XE_NDCG_MART"` Default: 5.
1033
1036
dart_dropout: Dropout rate applied when using the DART i.e. when
1034
1037
forest_extraction=DART. Default: None.
1035
1038
early_stopping: Early stopping detects the overfitting of the model and
@@ -1048,12 +1051,12 @@ class GradientBoostedTreesModel(core.CoreModel):
1048
1051
Default: 10.
1049
1052
early_stopping_num_trees_look_ahead: Rolling number of trees used to detect
1050
1053
validation loss increase and trigger early stopping. Default: 30.
1051
- focal_loss_alpha: EXPERIMENTAL. Weighting parameter for focal loss, positive
1052
- samples weighted by alpha, negative samples by (1-alpha). The default 0.5
1053
- value means no active class-level weighting. Only used with focal loss
1054
- i.e. `loss="BINARY_FOCAL_LOSS"` Default: 0.5.
1055
- focal_loss_gamma: EXPERIMENTAL. Exponent of the misprediction exponent term
1056
- in focal loss, corresponds to gamma parameter in
1054
+ focal_loss_alpha: EXPERIMENTAL, default 0.5. Weighting parameter for focal
1055
+ loss, positive samples weighted by alpha, negative samples by (1-alpha).
1056
+ The default 0.5 value means no active class-level weighting. Only used
1057
+ with focal loss i.e. `loss="BINARY_FOCAL_LOSS"` Default: 0.5.
1058
+ focal_loss_gamma: EXPERIMENTAL, default 2.0. Exponent of the misprediction
1059
+ exponent term in focal loss, corresponds to gamma parameter in
1057
1060
https://arxiv.org/pdf/1708.02002.pdf. Only used with focal loss i.e.
1058
1061
`loss="BINARY_FOCAL_LOSS"` Default: 2.0.
1059
1062
forest_extraction: How to construct the forest: - MART: For Multiple
@@ -1122,12 +1125,13 @@ class GradientBoostedTreesModel(core.CoreModel):
1122
1125
likelihood loss. Mainly used for counting problems. Only valid for
1123
1126
regression. - `MULTINOMIAL_LOG_LIKELIHOOD`: Multinomial log likelihood
1124
1127
i.e. cross-entropy. Only valid for binary or multi-class classification. -
1125
- `LAMBDA_MART_NDCG5 `: LambdaMART with NDCG5 . - `XE_NDCG_MART`: Cross
1128
+ `LAMBDA_MART_NDCG `: LambdaMART with NDCG@5 . - `XE_NDCG_MART`: Cross
1126
1129
Entropy Loss NDCG. See arxiv.org/abs/1911.09798. - `BINARY_FOCAL_LOSS`:
1127
1130
Focal loss. Only valid for binary classification. See
1128
1131
https://arxiv.org/pdf/1708.02002.pdf. - `POISSON`: Poisson log likelihood.
1129
1132
Only valid for regression. - `MEAN_AVERAGE_ERROR`: Mean average error
1130
- a.k.a. MAE.
1133
+ a.k.a. MAE. - `LAMBDA_MART_NDCG5`: DEPRECATED, use LAMBDA_MART_NDCG.
1134
+ LambdaMART with NDCG@5.
1131
1135
Default: "DEFAULT".
1132
1136
max_depth: Maximum depth of the tree. `max_depth=1` means that all trees
1133
1137
will be roots. `max_depth=-1` means that tree depth is not restricted by
@@ -1170,6 +1174,8 @@ class GradientBoostedTreesModel(core.CoreModel):
1170
1174
et al. in "Random Survival Forests"
1171
1175
(https://projecteuclid.org/download/pdfview_1/euclid.aoas/1223908043).
1172
1176
Default: "GLOBAL_IMPUTATION".
1177
+ ndcg_truncation: Truncation of the NDCG loss (default 5). Only used with
1178
+ NDCG loss i.e. `loss="LAMBDA_MART_NDCG". ` Default: 5.
1173
1179
num_candidate_attributes: Number of unique valid attributes tested for each
1174
1180
node. An attribute is valid if it has at least a valid split. If
1175
1181
`num_candidate_attributes=0`, the value is set to the classical default
@@ -1266,7 +1272,7 @@ class GradientBoostedTreesModel(core.CoreModel):
1266
1272
split_axis: What structure of split to consider for numerical features. -
1267
1273
`AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
1268
1274
is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
1269
- Sparse oblique splits (i.e. random splits one a small number of features)
1275
+ Sparse oblique splits (i.e. random splits on a small number of features)
1270
1276
from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
1271
1277
`MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
1272
1278
"Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
@@ -1336,6 +1342,7 @@ def __init__(
1336
1342
categorical_set_split_max_num_items : Optional [int ] = - 1 ,
1337
1343
categorical_set_split_min_item_frequency : Optional [int ] = 1 ,
1338
1344
compute_permutation_variable_importance : Optional [bool ] = False ,
1345
+ cross_entropy_ndcg_truncation : Optional [int ] = 5 ,
1339
1346
dart_dropout : Optional [float ] = None ,
1340
1347
early_stopping : Optional [str ] = "LOSS_INCREASE" ,
1341
1348
early_stopping_initial_iteration : Optional [int ] = 10 ,
@@ -1364,6 +1371,7 @@ def __init__(
1364
1371
mhld_oblique_sample_attributes : Optional [bool ] = None ,
1365
1372
min_examples : Optional [int ] = 5 ,
1366
1373
missing_value_policy : Optional [str ] = "GLOBAL_IMPUTATION" ,
1374
+ ndcg_truncation : Optional [int ] = 5 ,
1367
1375
num_candidate_attributes : Optional [int ] = - 1 ,
1368
1376
num_candidate_attributes_ratio : Optional [float ] = - 1.0 ,
1369
1377
num_trees : Optional [int ] = 300 ,
@@ -1407,6 +1415,7 @@ def __init__(
1407
1415
"compute_permutation_variable_importance" : (
1408
1416
compute_permutation_variable_importance
1409
1417
),
1418
+ "cross_entropy_ndcg_truncation" : cross_entropy_ndcg_truncation ,
1410
1419
"dart_dropout" : dart_dropout ,
1411
1420
"early_stopping" : early_stopping ,
1412
1421
"early_stopping_initial_iteration" : early_stopping_initial_iteration ,
@@ -1439,6 +1448,7 @@ def __init__(
1439
1448
"mhld_oblique_sample_attributes" : mhld_oblique_sample_attributes ,
1440
1449
"min_examples" : min_examples ,
1441
1450
"missing_value_policy" : missing_value_policy ,
1451
+ "ndcg_truncation" : ndcg_truncation ,
1442
1452
"num_candidate_attributes" : num_candidate_attributes ,
1443
1453
"num_candidate_attributes_ratio" : num_candidate_attributes_ratio ,
1444
1454
"num_trees" : num_trees ,
@@ -2369,7 +2379,7 @@ class RandomForestModel(core.CoreModel):
2369
2379
split_axis: What structure of split to consider for numerical features. -
2370
2380
`AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
2371
2381
is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
2372
- Sparse oblique splits (i.e. random splits one a small number of features)
2382
+ Sparse oblique splits (i.e. random splits on a small number of features)
2373
2383
from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
2374
2384
`MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
2375
2385
"Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
0 commit comments