@@ -41,6 +41,9 @@ GBDT::GBDT()
41
41
average_output_ = false ;
42
42
tree_learner_ = nullptr ;
43
43
linear_tree_ = false ;
44
+ gradients_pointer_ = nullptr ;
45
+ hessians_pointer_ = nullptr ;
46
+ boosting_on_gpu_ = false ;
44
47
}
45
48
46
49
GBDT::~GBDT () {
@@ -95,9 +98,9 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
95
98
96
99
is_constant_hessian_ = GetIsConstHessian (objective_function);
97
100
98
- const bool boosting_on_gpu = objective_function_ != nullptr && objective_function_->IsCUDAObjective ();
101
+ boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective ();
99
102
tree_learner_ = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner (config_->tree_learner , config_->device_type ,
100
- config_.get (), boosting_on_gpu ));
103
+ config_.get (), boosting_on_gpu_ ));
101
104
102
105
// init tree learner
103
106
tree_learner_->Init (train_data_, is_constant_hessian_);
@@ -112,7 +115,7 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
112
115
113
116
#ifdef USE_CUDA_EXP
114
117
if (config_->device_type == std::string (" cuda_exp" )) {
115
- train_score_updater_.reset (new CUDAScoreUpdater (train_data_, num_tree_per_iteration_, boosting_on_gpu ));
118
+ train_score_updater_.reset (new CUDAScoreUpdater (train_data_, num_tree_per_iteration_, boosting_on_gpu_ ));
116
119
} else {
117
120
#endif // USE_CUDA_EXP
118
121
train_score_updater_.reset (new ScoreUpdater (train_data_, num_tree_per_iteration_));
@@ -123,9 +126,14 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
123
126
num_data_ = train_data_->num_data ();
124
127
// create buffer for gradients and Hessians
125
128
if (objective_function_ != nullptr ) {
126
- size_t total_size = static_cast <size_t >(num_data_) * num_tree_per_iteration_;
129
+ const size_t total_size = static_cast <size_t >(num_data_) * num_tree_per_iteration_;
127
130
#ifdef USE_CUDA_EXP
128
- if (config_->device_type == std::string (" cuda_exp" ) && boosting_on_gpu) {
131
+ if (config_->device_type == std::string (" cuda_exp" ) && boosting_on_gpu_) {
132
+ if (gradients_pointer_ != nullptr ) {
133
+ CHECK_NOTNULL (hessians_pointer_);
134
+ DeallocateCUDAMemory<score_t >(&gradients_pointer_, __FILE__, __LINE__);
135
+ DeallocateCUDAMemory<score_t >(&hessians_pointer_, __FILE__, __LINE__);
136
+ }
129
137
AllocateCUDAMemory<score_t >(&gradients_pointer_, total_size, __FILE__, __LINE__);
130
138
AllocateCUDAMemory<score_t >(&hessians_pointer_, total_size, __FILE__, __LINE__);
131
139
} else {
@@ -137,17 +145,14 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
137
145
#ifdef USE_CUDA_EXP
138
146
}
139
147
#endif // USE_CUDA_EXP
140
- #ifndef USE_CUDA_EXP
141
- }
142
- #else // USE_CUDA_EXP
143
- } else {
144
- if (config_->device_type == std::string (" cuda_exp" )) {
145
- size_t total_size = static_cast <size_t >(num_data_) * num_tree_per_iteration_;
146
- AllocateCUDAMemory<score_t >(&gradients_pointer_, total_size, __FILE__, __LINE__);
147
- AllocateCUDAMemory<score_t >(&hessians_pointer_, total_size, __FILE__, __LINE__);
148
- }
148
+ } else if (config_->boosting == std::string (" goss" )) {
149
+ const size_t total_size = static_cast <size_t >(num_data_) * num_tree_per_iteration_;
150
+ gradients_.resize (total_size);
151
+ hessians_.resize (total_size);
152
+ gradients_pointer_ = gradients_.data ();
153
+ hessians_pointer_ = hessians_.data ();
149
154
}
150
- # endif // USE_CUDA_EXP
155
+
151
156
// get max feature index
152
157
max_feature_idx_ = train_data_->num_total_features () - 1 ;
153
158
// get label index
@@ -440,23 +445,36 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
440
445
Boosting ();
441
446
gradients = gradients_pointer_;
442
447
hessians = hessians_pointer_;
443
- #ifndef USE_CUDA_EXP
444
- }
445
- #else // USE_CUDA_EXP
446
448
} else {
447
- if (config_->device_type == std::string (" cuda_exp" )) {
448
- const size_t total_size = static_cast <size_t >(num_data_ * num_class_);
449
- CopyFromHostToCUDADevice<score_t >(gradients_pointer_, gradients, total_size, __FILE__, __LINE__);
450
- CopyFromHostToCUDADevice<score_t >(hessians_pointer_, hessians, total_size, __FILE__, __LINE__);
449
+ // use customized objective function
450
+ CHECK (objective_function_ == nullptr );
451
+ if (config_->boosting == std::string (" goss" )) {
452
+ // need to copy customized gradients when using GOSS
453
+ int64_t total_size = static_cast <int64_t >(num_data_) * num_tree_per_iteration_;
454
+ #pragma omp parallel for schedule(static)
455
+ for (int64_t i = 0 ; i < total_size; ++i) {
456
+ gradients_[i] = gradients[i];
457
+ hessians_[i] = hessians[i];
458
+ }
459
+ CHECK_EQ (gradients_pointer_, gradients_.data ());
460
+ CHECK_EQ (hessians_pointer_, hessians_.data ());
451
461
gradients = gradients_pointer_;
452
462
hessians = hessians_pointer_;
453
463
}
454
464
}
455
- #endif // USE_CUDA_EXP
456
465
457
466
// bagging logic
458
467
Bagging (iter_);
459
468
469
+ if (gradients != nullptr && is_use_subset_ && bag_data_cnt_ < num_data_ && !boosting_on_gpu_ && config_->boosting != std::string (" goss" )) {
470
+ // allocate gradients_ and hessians_ for copy gradients for using data subset
471
+ int64_t total_size = static_cast <int64_t >(num_data_) * num_tree_per_iteration_;
472
+ gradients_.resize (total_size);
473
+ hessians_.resize (total_size);
474
+ gradients_pointer_ = gradients_.data ();
475
+ hessians_pointer_ = hessians_.data ();
476
+ }
477
+
460
478
bool should_continue = false ;
461
479
for (int cur_tree_id = 0 ; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
462
480
const size_t offset = static_cast <size_t >(cur_tree_id) * num_data_;
@@ -465,7 +483,7 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
465
483
auto grad = gradients + offset;
466
484
auto hess = hessians + offset;
467
485
// need to copy gradients for bagging subset.
468
- if (is_use_subset_ && bag_data_cnt_ < num_data_ && config_-> device_type != std::string ( " cuda_exp " ) ) {
486
+ if (is_use_subset_ && bag_data_cnt_ < num_data_ && !boosting_on_gpu_ ) {
469
487
for (int i = 0 ; i < bag_data_cnt_; ++i) {
470
488
gradients_pointer_[offset + i] = grad[bag_data_indices_[i]];
471
489
hessians_pointer_[offset + i] = hess[bag_data_indices_[i]];
@@ -591,13 +609,12 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
591
609
592
610
std::vector<double > GBDT::EvalOneMetric (const Metric* metric, const double * score) const {
593
611
#ifdef USE_CUDA_EXP
594
- const bool boosting_on_cuda = objective_function_ != nullptr && objective_function_->IsCUDAObjective ();
595
612
const bool evaluation_on_cuda = metric->IsCUDAMetric ();
596
- if ((boosting_on_cuda && evaluation_on_cuda) || (!boosting_on_cuda && !evaluation_on_cuda)) {
613
+ if ((boosting_on_gpu_ && evaluation_on_cuda) || (!boosting_on_gpu_ && !evaluation_on_cuda)) {
597
614
#endif // USE_CUDA_EXP
598
615
return metric->Eval (score, objective_function_);
599
616
#ifdef USE_CUDA_EXP
600
- } else if (boosting_on_cuda && !evaluation_on_cuda) {
617
+ } else if (boosting_on_gpu_ && !evaluation_on_cuda) {
601
618
const size_t total_size = static_cast <size_t >(num_data_) * static_cast <size_t >(num_tree_per_iteration_);
602
619
if (total_size > host_score_.size ()) {
603
620
host_score_.resize (total_size, 0 .0f );
@@ -804,17 +821,16 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
804
821
}
805
822
training_metrics_.shrink_to_fit ();
806
823
807
- #ifdef USE_CUDA_EXP
808
- const bool boosting_on_gpu = objective_function_ != nullptr && objective_function_->IsCUDAObjective ();
809
- #endif // USE_CUDA_EXP
824
+ boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective ();
825
+ tree_learner_->ResetBoostingOnGPU (boosting_on_gpu_);
810
826
811
827
if (train_data != train_data_) {
812
828
train_data_ = train_data;
813
829
// not same training data, need reset score and others
814
830
// create score tracker
815
831
#ifdef USE_CUDA_EXP
816
832
if (config_->device_type == std::string (" cuda_exp" )) {
817
- train_score_updater_.reset (new CUDAScoreUpdater (train_data_, num_tree_per_iteration_, boosting_on_gpu ));
833
+ train_score_updater_.reset (new CUDAScoreUpdater (train_data_, num_tree_per_iteration_, boosting_on_gpu_ ));
818
834
} else {
819
835
#endif // USE_CUDA_EXP
820
836
train_score_updater_.reset (new ScoreUpdater (train_data_, num_tree_per_iteration_));
@@ -834,9 +850,14 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
834
850
835
851
// create buffer for gradients and hessians
836
852
if (objective_function_ != nullptr ) {
837
- size_t total_size = static_cast <size_t >(num_data_) * num_tree_per_iteration_;
853
+ const size_t total_size = static_cast <size_t >(num_data_) * num_tree_per_iteration_;
838
854
#ifdef USE_CUDA_EXP
839
- if (config_->device_type == std::string (" cuda_exp" ) && boosting_on_gpu) {
855
+ if (config_->device_type == std::string (" cuda_exp" ) && boosting_on_gpu_) {
856
+ if (gradients_pointer_ != nullptr ) {
857
+ CHECK_NOTNULL (hessians_pointer_);
858
+ DeallocateCUDAMemory<score_t >(&gradients_pointer_, __FILE__, __LINE__);
859
+ DeallocateCUDAMemory<score_t >(&hessians_pointer_, __FILE__, __LINE__);
860
+ }
840
861
AllocateCUDAMemory<score_t >(&gradients_pointer_, total_size, __FILE__, __LINE__);
841
862
AllocateCUDAMemory<score_t >(&hessians_pointer_, total_size, __FILE__, __LINE__);
842
863
} else {
@@ -848,6 +869,12 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
848
869
#ifdef USE_CUDA_EXP
849
870
}
850
871
#endif // USE_CUDA_EXP
872
+ } else if (config_->boosting == std::string (" goss" )) {
873
+ const size_t total_size = static_cast <size_t >(num_data_) * num_tree_per_iteration_;
874
+ gradients_.resize (total_size);
875
+ hessians_.resize (total_size);
876
+ gradients_pointer_ = gradients_.data ();
877
+ hessians_pointer_ = hessians_.data ();
851
878
}
852
879
853
880
max_feature_idx_ = train_data_->num_total_features () - 1 ;
@@ -879,6 +906,10 @@ void GBDT::ResetConfig(const Config* config) {
879
906
if (tree_learner_ != nullptr ) {
880
907
tree_learner_->ResetConfig (new_config.get ());
881
908
}
909
+
910
+ boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective ();
911
+ tree_learner_->ResetBoostingOnGPU (boosting_on_gpu_);
912
+
882
913
if (train_data_ != nullptr ) {
883
914
ResetBaggingConfig (new_config.get (), false );
884
915
}
@@ -953,10 +984,16 @@ void GBDT::ResetBaggingConfig(const Config* config, bool is_change_dataset) {
953
984
need_re_bagging_ = true ;
954
985
955
986
if (is_use_subset_ && bag_data_cnt_ < num_data_) {
956
- if (objective_function_ == nullptr ) {
957
- size_t total_size = static_cast <size_t >(num_data_) * num_tree_per_iteration_;
987
+ // resize gradient vectors to copy the customized gradients for goss or bagging with subset
988
+ if (objective_function_ != nullptr ) {
989
+ const size_t total_size = static_cast <size_t >(num_data_) * num_tree_per_iteration_;
958
990
#ifdef USE_CUDA_EXP
959
- if (config_->device_type == std::string (" cuda_exp" ) && objective_function_ != nullptr && objective_function_->IsCUDAObjective ()) {
991
+ if (config_->device_type == std::string (" cuda_exp" ) && boosting_on_gpu_) {
992
+ if (gradients_pointer_ != nullptr ) {
993
+ CHECK_NOTNULL (hessians_pointer_);
994
+ DeallocateCUDAMemory<score_t >(&gradients_pointer_, __FILE__, __LINE__);
995
+ DeallocateCUDAMemory<score_t >(&hessians_pointer_, __FILE__, __LINE__);
996
+ }
960
997
AllocateCUDAMemory<score_t >(&gradients_pointer_, total_size, __FILE__, __LINE__);
961
998
AllocateCUDAMemory<score_t >(&hessians_pointer_, total_size, __FILE__, __LINE__);
962
999
} else {
@@ -968,6 +1005,12 @@ void GBDT::ResetBaggingConfig(const Config* config, bool is_change_dataset) {
968
1005
#ifdef USE_CUDA_EXP
969
1006
}
970
1007
#endif // USE_CUDA_EXP
1008
+ } else if (config_->boosting == std::string (" goss" )) {
1009
+ const size_t total_size = static_cast <size_t >(num_data_) * num_tree_per_iteration_;
1010
+ gradients_.resize (total_size);
1011
+ hessians_.resize (total_size);
1012
+ gradients_pointer_ = gradients_.data ();
1013
+ hessians_pointer_ = hessians_.data ();
971
1014
}
972
1015
}
973
1016
} else {
0 commit comments