Skip to content

Commit d1d2f4e

Browse files
rstzcopybara-github
authored andcommitted
Prepare release of TF-DF 1.11.0
PiperOrigin-RevId: 690589733
1 parent c3f2df3 commit d1d2f4e

File tree

8 files changed

+67
-37
lines changed

8 files changed

+67
-37
lines changed

CHANGELOG.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
11
# Changelog
22

3-
## HEAD
3+
## 1.11.0 - 2024-10-28
4+
5+
### Feature
6+
7+
- Renamed LAMBDA_MART_NDCG5 loss to LAMBDA_MART_NDCG. The old loss is still
8+
available. The ndcg truncation can now be modified via a hyperparameter.
9+
- Notify users about ydf during startup. This message can be disabled by
10+
setting Environment variable TFDF_DISABLE_WELCOME_MESSAGE.
411

512
### Fix
613

714
- Some errors are now InvalidArgumentError instead of UnknownError.
15+
- Fix compatibility with TF 2.18.0.
816

917
## 1.10.0 - 2024-08-21
1018

WORKSPACE

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,28 @@ http_archive(
2020
# absl used by tensorflow.
2121
http_archive(
2222
name = "org_tensorflow",
23-
strip_prefix = "tensorflow-2.17.0",
24-
sha256 = "9cc4d5773b8ee910079baaecb4086d0c28939f024dd74b33fc5e64779b6533dc",
25-
urls = ["https://github.com/tensorflow/tensorflow/archive/v2.17.0.tar.gz"],
23+
sha256 = "d7876f4bb0235cac60eb6316392a7c48676729860da1ab659fb440379ad5186d",
24+
strip_prefix = "tensorflow-2.18.0",
25+
urls = ["https://github.com/tensorflow/tensorflow/archive/v2.18.0.tar.gz"],
2626
)
2727

28-
2928
load("//tensorflow_decision_forests:tensorflow_decision_forests.bzl", "py_deps_profile")
3029

3130
py_deps_profile(
3231
name = "release_or_nightly",
33-
requirements_in = "//configure:requirements.in",
34-
pip_repo_name = "pypi",
3532
deps_map = {
36-
"tensorflow": ["tf-nightly", "tf_header_lib", "libtensorflow_framework"],
37-
"tf-keras": ["tf-keras-nightly"]
33+
"tensorflow": [
34+
"tf-nightly",
35+
"tf_header_lib",
36+
"libtensorflow_framework",
37+
],
38+
"tf-keras": ["tf-keras-nightly"],
3839
},
40+
pip_repo_name = "pypi",
41+
requirements_in = "//configure:requirements.in",
3942
switch = {
40-
"IS_NIGHTLY": "nightly"
41-
}
43+
"IS_NIGHTLY": "nightly",
44+
},
4245
)
4346

4447
# Initialize hermetic Python
@@ -49,12 +52,12 @@ python_init_rules()
4952
load("@org_tensorflow//third_party/py:python_init_repositories.bzl", "python_init_repositories")
5053

5154
python_init_repositories(
55+
default_python_version = "system",
5256
requirements = {
5357
"3.9": "//configure:requirements_lock_3_9.txt",
5458
"3.10": "//configure:requirements_lock_3_10.txt",
5559
"3.11": "//configure:requirements_lock_3_11.txt",
5660
},
57-
default_python_version = "system",
5861
)
5962

6063
load("@org_tensorflow//third_party/py:python_init_toolchains.bzl", "python_init_toolchains")
@@ -140,16 +143,20 @@ nccl_configure(name = "local_config_nccl")
140143
# ========================================
141144

142145
# Third party libraries
143-
load("//third_party/absl_py:workspace.bzl", absl_py = "deps")
144146
load("//third_party/absl:workspace.bzl", absl = "deps")
147+
load("//third_party/absl_py:workspace.bzl", absl_py = "deps")
145148
load("//third_party/benchmark:workspace.bzl", benchmark = "deps")
146149
load("//third_party/gtest:workspace.bzl", gtest = "deps")
147150
load("//third_party/protobuf:workspace.bzl", protobuf = "deps")
148151

149152
absl()
153+
150154
absl_py()
155+
151156
benchmark()
157+
152158
gtest()
159+
153160
protobuf()
154161

155162
# Yggdrasil Decision Forests
@@ -170,7 +177,7 @@ ydf_load_deps(
170177
"pybind11",
171178
"pybind11_abseil",
172179
"pybind11_protobuf",
173-
"tensorflow"
180+
"tensorflow",
174181
],
175182
repo_name = "@ydf",
176183
)

configure/setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@
2323
from setuptools.command.install import install
2424
from setuptools.dist import Distribution
2525

26-
_VERSION = "1.10.0"
26+
_VERSION = "1.11.0"
2727

2828
with open("README.md", "r", encoding="utf-8") as fh:
2929
long_description = fh.read()
3030

3131
REQUIRED_PACKAGES = [
3232
"numpy",
3333
"pandas",
34-
"tensorflow==2.17.0",
34+
"tensorflow==2.18.0",
3535
"six",
3636
"absl_py",
3737
"wheel",

documentation/known_issues.md

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
# Known Issues
22

3-
The underlying engine behind the decision forests algorithms used by TensorFlow
4-
Decision Forests have been extensively production-tested. This file lists some
5-
of the known issues.
3+
## Prefer YDF for new projects
64

7-
See also the [migration guide](migration.md) for behavior that is different from
8-
other algorithms.
5+
[YDF](https://github.com/google/yggdrasil-decision-forests) is Google's new
6+
library to train Decision Forests.
7+
8+
YDF extends the power of TF-DF, offering new features, a simplified API, faster
9+
training times, updated documentation, and enhanced compatibility with popular
10+
ML libraries.
11+
12+
Some of the issues mentioned below are fixed in YDF.
913

1014
## Windows Pip package is not available
1115

@@ -54,6 +58,7 @@ The following table shows the compatibility between
5458

5559
tensorflow_decision_forests | tensorflow
5660
--------------------------- | ---------------
61+
1.11.0 | 2.18.0
5762
1.10.0 | 2.17.0
5863
1.9.2 | 2.16.2
5964
1.9.1 | 2.16.1

tensorflow_decision_forests/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@
5151
```
5252
"""
5353

54-
__version__ = "1.10.0"
54+
__version__ = "1.11.0"
5555
__author__ = "Mathieu Guillame-Bert"
5656

57-
compatible_tf_versions = ["2.17.0"]
57+
compatible_tf_versions = ["2.18.0"]
5858
__git_version__ = "HEAD" # Modify for release build.
5959

6060
from tensorflow_decision_forests.tensorflow import check_version

tensorflow_decision_forests/keras/wrappers_pre_generated.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ class CartModel(core.CoreModel):
359359
split_axis: What structure of split to consider for numerical features. -
360360
`AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
361361
is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
362-
Sparse oblique splits (i.e. random splits one a small number of features)
362+
Sparse oblique splits (i.e. random splits on a small number of features)
363363
from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
364364
`MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
365365
"Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
@@ -1030,6 +1030,9 @@ class GradientBoostedTreesModel(core.CoreModel):
10301030
variable importance of the model at the end of the training using the
10311031
validation dataset. Enabling this feature can increase the training time
10321032
significantly. Default: False.
1033+
cross_entropy_ndcg_truncation: Truncation of the cross-entropy NDCG loss
1034+
(default 5). Only used with cross-entropy NDCG loss i.e.
1035+
`loss="XE_NDCG_MART"` Default: 5.
10331036
dart_dropout: Dropout rate applied when using the DART i.e. when
10341037
forest_extraction=DART. Default: None.
10351038
early_stopping: Early stopping detects the overfitting of the model and
@@ -1048,12 +1051,12 @@ class GradientBoostedTreesModel(core.CoreModel):
10481051
Default: 10.
10491052
early_stopping_num_trees_look_ahead: Rolling number of trees used to detect
10501053
validation loss increase and trigger early stopping. Default: 30.
1051-
focal_loss_alpha: EXPERIMENTAL. Weighting parameter for focal loss, positive
1052-
samples weighted by alpha, negative samples by (1-alpha). The default 0.5
1053-
value means no active class-level weighting. Only used with focal loss
1054-
i.e. `loss="BINARY_FOCAL_LOSS"` Default: 0.5.
1055-
focal_loss_gamma: EXPERIMENTAL. Exponent of the misprediction exponent term
1056-
in focal loss, corresponds to gamma parameter in
1054+
focal_loss_alpha: EXPERIMENTAL, default 0.5. Weighting parameter for focal
1055+
loss, positive samples weighted by alpha, negative samples by (1-alpha).
1056+
The default 0.5 value means no active class-level weighting. Only used
1057+
with focal loss i.e. `loss="BINARY_FOCAL_LOSS"` Default: 0.5.
1058+
focal_loss_gamma: EXPERIMENTAL, default 2.0. Exponent of the misprediction
1059+
exponent term in focal loss, corresponds to gamma parameter in
10571060
https://arxiv.org/pdf/1708.02002.pdf. Only used with focal loss i.e.
10581061
`loss="BINARY_FOCAL_LOSS"` Default: 2.0.
10591062
forest_extraction: How to construct the forest: - MART: For Multiple
@@ -1122,12 +1125,13 @@ class GradientBoostedTreesModel(core.CoreModel):
11221125
likelihood loss. Mainly used for counting problems. Only valid for
11231126
regression. - `MULTINOMIAL_LOG_LIKELIHOOD`: Multinomial log likelihood
11241127
i.e. cross-entropy. Only valid for binary or multi-class classification. -
1125-
`LAMBDA_MART_NDCG5`: LambdaMART with NDCG5. - `XE_NDCG_MART`: Cross
1128+
`LAMBDA_MART_NDCG`: LambdaMART with NDCG@5. - `XE_NDCG_MART`: Cross
11261129
Entropy Loss NDCG. See arxiv.org/abs/1911.09798. - `BINARY_FOCAL_LOSS`:
11271130
Focal loss. Only valid for binary classification. See
11281131
https://arxiv.org/pdf/1708.02002.pdf. - `POISSON`: Poisson log likelihood.
11291132
Only valid for regression. - `MEAN_AVERAGE_ERROR`: Mean average error
1130-
a.k.a. MAE.
1133+
a.k.a. MAE. - `LAMBDA_MART_NDCG5`: DEPRECATED, use LAMBDA_MART_NDCG.
1134+
LambdaMART with NDCG@5.
11311135
Default: "DEFAULT".
11321136
max_depth: Maximum depth of the tree. `max_depth=1` means that all trees
11331137
will be roots. `max_depth=-1` means that tree depth is not restricted by
@@ -1170,6 +1174,8 @@ class GradientBoostedTreesModel(core.CoreModel):
11701174
et al. in "Random Survival Forests"
11711175
(https://projecteuclid.org/download/pdfview_1/euclid.aoas/1223908043).
11721176
Default: "GLOBAL_IMPUTATION".
1177+
ndcg_truncation: Truncation of the NDCG loss (default 5). Only used with
1178+
NDCG loss i.e. `loss="LAMBDA_MART_NDCG". ` Default: 5.
11731179
num_candidate_attributes: Number of unique valid attributes tested for each
11741180
node. An attribute is valid if it has at least a valid split. If
11751181
`num_candidate_attributes=0`, the value is set to the classical default
@@ -1266,7 +1272,7 @@ class GradientBoostedTreesModel(core.CoreModel):
12661272
split_axis: What structure of split to consider for numerical features. -
12671273
`AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
12681274
is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
1269-
Sparse oblique splits (i.e. random splits one a small number of features)
1275+
Sparse oblique splits (i.e. random splits on a small number of features)
12701276
from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
12711277
`MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
12721278
"Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes
@@ -1336,6 +1342,7 @@ def __init__(
13361342
categorical_set_split_max_num_items: Optional[int] = -1,
13371343
categorical_set_split_min_item_frequency: Optional[int] = 1,
13381344
compute_permutation_variable_importance: Optional[bool] = False,
1345+
cross_entropy_ndcg_truncation: Optional[int] = 5,
13391346
dart_dropout: Optional[float] = None,
13401347
early_stopping: Optional[str] = "LOSS_INCREASE",
13411348
early_stopping_initial_iteration: Optional[int] = 10,
@@ -1364,6 +1371,7 @@ def __init__(
13641371
mhld_oblique_sample_attributes: Optional[bool] = None,
13651372
min_examples: Optional[int] = 5,
13661373
missing_value_policy: Optional[str] = "GLOBAL_IMPUTATION",
1374+
ndcg_truncation: Optional[int] = 5,
13671375
num_candidate_attributes: Optional[int] = -1,
13681376
num_candidate_attributes_ratio: Optional[float] = -1.0,
13691377
num_trees: Optional[int] = 300,
@@ -1407,6 +1415,7 @@ def __init__(
14071415
"compute_permutation_variable_importance": (
14081416
compute_permutation_variable_importance
14091417
),
1418+
"cross_entropy_ndcg_truncation": cross_entropy_ndcg_truncation,
14101419
"dart_dropout": dart_dropout,
14111420
"early_stopping": early_stopping,
14121421
"early_stopping_initial_iteration": early_stopping_initial_iteration,
@@ -1439,6 +1448,7 @@ def __init__(
14391448
"mhld_oblique_sample_attributes": mhld_oblique_sample_attributes,
14401449
"min_examples": min_examples,
14411450
"missing_value_policy": missing_value_policy,
1451+
"ndcg_truncation": ndcg_truncation,
14421452
"num_candidate_attributes": num_candidate_attributes,
14431453
"num_candidate_attributes_ratio": num_candidate_attributes_ratio,
14441454
"num_trees": num_trees,
@@ -2369,7 +2379,7 @@ class RandomForestModel(core.CoreModel):
23692379
split_axis: What structure of split to consider for numerical features. -
23702380
`AXIS_ALIGNED`: Axis aligned splits (i.e. one condition at a time). This
23712381
is the "classical" way to train a tree. Default value. - `SPARSE_OBLIQUE`:
2372-
Sparse oblique splits (i.e. random splits one a small number of features)
2382+
Sparse oblique splits (i.e. random splits on a small number of features)
23732383
from "Sparse Projection Oblique Random Forests", Tomita et al., 2020. -
23742384
`MHLD_OBLIQUE`: Multi-class Hellinger Linear Discriminant splits from
23752385
"Classification Based on Multivariate Contrast Patterns", Canete-Sifuentes

tools/start_compile_docker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
# directory.
6565
TFDF_DIRNAME=${PWD##*/}
6666

67-
DOCKER_IMAGE=tensorflow/build:2.17-python3.9
67+
DOCKER_IMAGE=tensorflow/build:2.18-python3.9
6868
DOCKER_CONTAINER=compile_tfdf
6969

7070
echo "Available containers:"

tools/test_bazel.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#
2727
# Usage example
2828
#
29-
# RUN_TESTS=1 PY_VERSION=3.9 TF_VERSION=2.16.2 ./tools/test_bazel.sh
29+
# RUN_TESTS=1 PY_VERSION=3.9 TF_VERSION=2.18.0 ./tools/test_bazel.sh
3030

3131
set -vex
3232

@@ -90,7 +90,7 @@ commit_slug=$(curl -s "https://api.github.com/repos/tensorflow/tensorflow/commit
9090
# Update TF dependency to the chosen version
9191
sed -E -i "s/strip_prefix = \"tensorflow-2\.[0-9]+(\.[0-9]+)*(-rc[0-9]+)?\",/strip_prefix = \"tensorflow-${commit_slug}\",/" WORKSPACE
9292
sed -E -i "s|\"https://github.com/tensorflow/tensorflow/archive/v.+\.tar.gz\"|\"https://github.com/tensorflow/tensorflow/archive/${commit_slug}.tar.gz\"|" WORKSPACE
93-
prev_shasum=$(grep -A 1 -e "strip_prefix.*tensorflow-" WORKSPACE | tail -1 | awk -F '"' '{print $2}')
93+
prev_shasum=$(grep -B 1 -e "strip_prefix.*tensorflow-" WORKSPACE | head -1 | awk -F '"' '{print $2}')
9494
sed -i "s/sha256 = \"${prev_shasum}\",//" WORKSPACE
9595

9696
# Get build configuration for chosen version.

0 commit comments

Comments
 (0)