Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] RF: Variable binning and other minor refactoring #4479

Merged
merged 29 commits into from
Feb 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
d1f5467
initial impl of variable-binning
venkywonka Dec 9, 2021
bcce385
quantile post-processing done in kernel
venkywonka Dec 14, 2021
fd0825d
Merge branch 'branch-22.02' into enh-rf-variable-binning
venkywonka Dec 14, 2021
3191b55
user-driven prototypes using constant memory
venkywonka Dec 21, 2021
6c16fbb
add multi-stream quantile sorting
venkywonka Dec 23, 2021
76ac2dd
clean dev code
venkywonka Dec 23, 2021
98d1122
cleaning dev code
venkywonka Dec 23, 2021
4c55450
clang format'
venkywonka Dec 27, 2021
82c4839
Merge branch 'branch-22.02' into enh-rf-variable-binning
venkywonka Dec 30, 2021
bda8090
clang fix + nvtx update
venkywonka Jan 12, 2022
8592673
remove commented code + revert CMakeLists
venkywonka Jan 13, 2022
0cb1e49
use user-defined streams for sorting
venkywonka Jan 17, 2022
d18ddf0
Merge branch 'branch-22.02' into enh-rf-variable-binning
venkywonka Jan 17, 2022
c1b1636
review changes
venkywonka Jan 17, 2022
bd47b26
clang format
venkywonka Jan 17, 2022
eac966f
remove comments
venkywonka Jan 17, 2022
5d280cf
change name: 'input' to 'dataset'
venkywonka Jan 17, 2022
78f8cbe
delete input.h
venkywonka Jan 17, 2022
1159e95
remove stream-parallelism and other changes
venkywonka Jan 18, 2022
2b89113
Merge branch 'branch-22.02' into enh-rf-variable-binning
venkywonka Jan 21, 2022
cb7caa2
Merge branch 'branch-22.02' into enh-rf-variable-binning
venkywonka Jan 21, 2022
22a04bd
Merge branch 'branch-22.04' into enh-rf-variable-binning
venkywonka Jan 24, 2022
3d49e4c
change the in/out params of
venkywonka Jan 24, 2022
c9d919c
copyright
venkywonka Jan 24, 2022
eb62fec
change to and other small fixes
venkywonka Jan 28, 2022
93161a9
copyright and include fix
venkywonka Jan 28, 2022
5a63535
review changes
venkywonka Feb 1, 2022
13567d0
adding tests, variable-renaming, review comment changes
venkywonka Feb 2, 2022
b8d92c0
add srand
venkywonka Feb 3, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/bench/sg/fil.cu
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ std::vector<Params> getInputs()
p.rf = set_rf_params(10, /*max_depth */
(1 << 20), /* max_leaves */
1.f, /* max_features */
32, /* n_bins */
32, /* max_n_bins */
3, /* min_samples_leaf */
3, /* min_samples_split */
0.0f, /* min_impurity_decrease */
Expand Down
2 changes: 1 addition & 1 deletion cpp/bench/sg/rf_classifier.cu
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ std::vector<Params> getInputs()
p.rf = set_rf_params(10, /*max_depth */
(1 << 20), /* max_leaves */
0.3, /* max_features */
32, /* n_bins */
32, /* max_n_bins */
3, /* min_samples_leaf */
3, /* min_samples_split */
0.0f, /* min_impurity_decrease */
Expand Down
2 changes: 1 addition & 1 deletion cpp/bench/sg/rf_regressor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ std::vector<RegParams> getInputs()
p.rf = set_rf_params(10, /*max_depth */
(1 << 20), /* max_leaves */
0.3, /* max_features */
32, /* n_bins */
32, /* max_n_bins */
3, /* min_samples_leaf */
3, /* min_samples_split */
0.0f, /* min_impurity_decrease */
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cuml/ensemble/randomforest.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -191,7 +191,7 @@ RF_metrics score(const raft::handle_t& user_handle,
RF_params set_rf_params(int max_depth,
int max_leaves,
float max_features,
int n_bins,
int max_n_bins,
int min_samples_leaf,
int min_samples_split,
float min_impurity_decrease,
Expand Down
12 changes: 6 additions & 6 deletions cpp/include/cuml/tree/decisiontree.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -40,9 +40,9 @@ struct DecisionTreeParams {
*/
float max_features;
/**
* Number of bins used by the split algorithm.
* maximum number of bins used by the split algorithm per feature.
*/
int n_bins;
int max_n_bins;
/**
* The minimum number of samples (rows) in each leaf node.
*/
Expand Down Expand Up @@ -74,7 +74,7 @@ struct DecisionTreeParams {
* @param[in] cfg_max_depth: maximum tree depth; default -1
* @param[in] cfg_max_leaves: maximum leaves; default -1
* @param[in] cfg_max_features: maximum number of features; default 1.0f
* @param[in] cfg_n_bins: number of bins; default 8
* @param[in] cfg_max_n_bins: maximum number of bins; default 128
* @param[in] cfg_min_samples_leaf: min. rows in each leaf node; default 1
* @param[in] cfg_min_samples_split: min. rows needed to split an internal node;
* default 2
Expand All @@ -84,13 +84,13 @@ struct DecisionTreeParams {
* i.e., GINI for classification or MSE for regression
* @param[in] cfg_max_batch_size: Maximum number of nodes that can be processed
in a batch. This is used only for batched-level algo. Default
value 128.
value 4096.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch 👍

*/
void set_tree_params(DecisionTreeParams& params,
int cfg_max_depth = -1,
int cfg_max_leaves = -1,
float cfg_max_features = 1.0f,
int cfg_n_bins = 128,
int cfg_max_n_bins = 128,
int cfg_min_samples_leaf = 1,
int cfg_min_samples_split = 2,
float cfg_min_impurity_decrease = 0.0f,
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/decisiontree/batched-levelalgo/bins.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,9 +25,9 @@ struct CountBin {
HDI CountBin(int x_) : x(x_) {}
HDI CountBin() : x(0) {}

DI static void IncrementHistogram(CountBin* hist, int nbins, int b, int label)
DI static void IncrementHistogram(CountBin* hist, int n_bins, int b, int label)
{
auto offset = label * nbins + b;
auto offset = label * n_bins + b;
CountBin::AtomicAdd(hist + offset, {1});
}
DI static void AtomicAdd(CountBin* address, CountBin val) { atomicAdd(&address->x, val.x); }
Expand All @@ -51,7 +51,7 @@ struct AggregateBin {
HDI AggregateBin() : label_sum(0.0), count(0) {}
HDI AggregateBin(double label_sum, int count) : label_sum(label_sum), count(count) {}

DI static void IncrementHistogram(AggregateBin* hist, int nbins, int b, double label)
DI static void IncrementHistogram(AggregateBin* hist, int n_bins, int b, double label)
{
AggregateBin::AtomicAdd(hist + b, {label, 1});
}
Expand Down
Loading