forked from microsoft/LightGBM
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CUDA] Initial work for boosting and evaluation with CUDA (microsoft#…
…5279) * initial work for boosting and evaluation with CUDA * fix compatibility with CPU code * fix creating objective without USE_CUDA_EXP * fix static analysis errors * fix static analysis errors
- Loading branch information
Showing
22 changed files
with
869 additions
and
177 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
/*! | ||
* Copyright (c) 2021 Microsoft Corporation. All rights reserved. | ||
* Licensed under the MIT License. See LICENSE file in the project root for license information. | ||
*/ | ||
|
||
#include "cuda_score_updater.hpp" | ||
|
||
#ifdef USE_CUDA_EXP | ||
|
||
namespace LightGBM { | ||
|
||
CUDAScoreUpdater::CUDAScoreUpdater(const Dataset* data, int num_tree_per_iteration, const bool boosting_on_cuda): | ||
ScoreUpdater(data, num_tree_per_iteration), num_threads_per_block_(1024), boosting_on_cuda_(boosting_on_cuda) { | ||
num_data_ = data->num_data(); | ||
int64_t total_size = static_cast<int64_t>(num_data_) * num_tree_per_iteration; | ||
InitCUDA(total_size); | ||
has_init_score_ = false; | ||
const double* init_score = data->metadata().init_score(); | ||
// if exists initial score, will start from it | ||
if (init_score != nullptr) { | ||
if ((data->metadata().num_init_score() % num_data_) != 0 | ||
|| (data->metadata().num_init_score() / num_data_) != num_tree_per_iteration) { | ||
Log::Fatal("Number of class for initial score error"); | ||
} | ||
has_init_score_ = true; | ||
CopyFromHostToCUDADevice<double>(cuda_score_, init_score, total_size, __FILE__, __LINE__); | ||
} else { | ||
SetCUDAMemory<double>(cuda_score_, 0, static_cast<size_t>(num_data_), __FILE__, __LINE__); | ||
} | ||
SynchronizeCUDADevice(__FILE__, __LINE__); | ||
if (boosting_on_cuda_) { | ||
// clear host score buffer | ||
score_.clear(); | ||
score_.shrink_to_fit(); | ||
} | ||
} | ||
|
||
void CUDAScoreUpdater::InitCUDA(const size_t total_size) { | ||
AllocateCUDAMemory<double>(&cuda_score_, total_size, __FILE__, __LINE__); | ||
} | ||
|
||
CUDAScoreUpdater::~CUDAScoreUpdater() { | ||
DeallocateCUDAMemory<double>(&cuda_score_, __FILE__, __LINE__); | ||
} | ||
|
||
inline void CUDAScoreUpdater::AddScore(double val, int cur_tree_id) { | ||
Common::FunctionTimer fun_timer("CUDAScoreUpdater::AddScore", global_timer); | ||
const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id; | ||
LaunchAddScoreConstantKernel(val, offset); | ||
if (!boosting_on_cuda_) { | ||
CopyFromCUDADeviceToHost<double>(score_.data() + offset, cuda_score_ + offset, static_cast<size_t>(num_data_), __FILE__, __LINE__); | ||
} | ||
} | ||
|
||
inline void CUDAScoreUpdater::AddScore(const Tree* tree, int cur_tree_id) { | ||
Common::FunctionTimer fun_timer("ScoreUpdater::AddScore", global_timer); | ||
const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id; | ||
tree->AddPredictionToScore(data_, num_data_, cuda_score_ + offset); | ||
if (!boosting_on_cuda_) { | ||
CopyFromCUDADeviceToHost<double>(score_.data() + offset, cuda_score_ + offset, static_cast<size_t>(num_data_), __FILE__, __LINE__); | ||
} | ||
} | ||
|
||
inline void CUDAScoreUpdater::AddScore(const TreeLearner* tree_learner, const Tree* tree, int cur_tree_id) { | ||
Common::FunctionTimer fun_timer("ScoreUpdater::AddScore", global_timer); | ||
const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id; | ||
tree_learner->AddPredictionToScore(tree, cuda_score_ + offset); | ||
if (!boosting_on_cuda_) { | ||
CopyFromCUDADeviceToHost<double>(score_.data() + offset, cuda_score_ + offset, static_cast<size_t>(num_data_), __FILE__, __LINE__); | ||
} | ||
} | ||
|
||
inline void CUDAScoreUpdater::AddScore(const Tree* tree, const data_size_t* data_indices, | ||
data_size_t data_cnt, int cur_tree_id) { | ||
Common::FunctionTimer fun_timer("ScoreUpdater::AddScore", global_timer); | ||
const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id; | ||
tree->AddPredictionToScore(data_, data_indices, data_cnt, cuda_score_ + offset); | ||
if (!boosting_on_cuda_) { | ||
CopyFromCUDADeviceToHost<double>(score_.data() + offset, cuda_score_ + offset, static_cast<size_t>(num_data_), __FILE__, __LINE__); | ||
} | ||
} | ||
|
||
inline void CUDAScoreUpdater::MultiplyScore(double val, int cur_tree_id) { | ||
Common::FunctionTimer fun_timer("CUDAScoreUpdater::MultiplyScore", global_timer); | ||
const size_t offset = static_cast<size_t>(num_data_) * cur_tree_id; | ||
LaunchMultiplyScoreConstantKernel(val, offset); | ||
if (!boosting_on_cuda_) { | ||
CopyFromCUDADeviceToHost<double>(score_.data() + offset, cuda_score_ + offset, static_cast<size_t>(num_data_), __FILE__, __LINE__); | ||
} | ||
} | ||
|
||
} // namespace LightGBM | ||
|
||
#endif // USE_CUDA_EXP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/*! | ||
* Copyright (c) 2021 Microsoft Corporation. All rights reserved. | ||
* Licensed under the MIT License. See LICENSE file in the project root for license information. | ||
*/ | ||
|
||
#include "cuda_score_updater.hpp" | ||
|
||
#ifdef USE_CUDA_EXP | ||
|
||
namespace LightGBM { | ||
|
||
__global__ void AddScoreConstantKernel( | ||
const double val, | ||
const size_t offset, | ||
const data_size_t num_data, | ||
double* score) { | ||
const data_size_t data_index = static_cast<data_size_t>(threadIdx.x + blockIdx.x * blockDim.x); | ||
if (data_index < num_data) { | ||
score[data_index + offset] += val; | ||
} | ||
} | ||
|
||
void CUDAScoreUpdater::LaunchAddScoreConstantKernel(const double val, const size_t offset) { | ||
const int num_blocks = (num_data_ + num_threads_per_block_) / num_threads_per_block_; | ||
Log::Warning("adding init score = %f", val); | ||
AddScoreConstantKernel<<<num_blocks, num_threads_per_block_>>>(val, offset, num_data_, cuda_score_); | ||
} | ||
|
||
__global__ void MultiplyScoreConstantKernel( | ||
const double val, | ||
const size_t offset, | ||
const data_size_t num_data, | ||
double* score) { | ||
const data_size_t data_index = static_cast<data_size_t>(threadIdx.x + blockIdx.x * blockDim.x); | ||
if (data_index < num_data) { | ||
score[data_index] *= val; | ||
} | ||
} | ||
|
||
void CUDAScoreUpdater::LaunchMultiplyScoreConstantKernel(const double val, const size_t offset) { | ||
const int num_blocks = (num_data_ + num_threads_per_block_) / num_threads_per_block_; | ||
MultiplyScoreConstantKernel<<<num_blocks, num_threads_per_block_>>>(val, offset, num_data_, cuda_score_); | ||
} | ||
|
||
} // namespace LightGBM | ||
|
||
#endif // USE_CUDA_EXP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
/*! | ||
* Copyright (c) 2021 Microsoft Corporation. All rights reserved. | ||
* Licensed under the MIT License. See LICENSE file in the project root for license information. | ||
*/ | ||
|
||
#ifndef LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_ | ||
#define LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_ | ||
|
||
#ifdef USE_CUDA_EXP | ||
|
||
#include <LightGBM/cuda/cuda_utils.h> | ||
|
||
#include "../score_updater.hpp" | ||
|
||
namespace LightGBM { | ||
|
||
class CUDAScoreUpdater: public ScoreUpdater { | ||
public: | ||
CUDAScoreUpdater(const Dataset* data, int num_tree_per_iteration, const bool boosting_on_cuda); | ||
|
||
~CUDAScoreUpdater(); | ||
|
||
inline void AddScore(double val, int cur_tree_id) override; | ||
|
||
inline void AddScore(const Tree* tree, int cur_tree_id) override; | ||
|
||
inline void AddScore(const TreeLearner* tree_learner, const Tree* tree, int cur_tree_id) override; | ||
|
||
inline void AddScore(const Tree* tree, const data_size_t* data_indices, | ||
data_size_t data_cnt, int cur_tree_id) override; | ||
|
||
inline void MultiplyScore(double val, int cur_tree_id) override; | ||
|
||
inline const double* score() const override { | ||
if (boosting_on_cuda_) { | ||
return cuda_score_; | ||
} else { | ||
return score_.data(); | ||
} | ||
} | ||
|
||
/*! \brief Disable copy */ | ||
CUDAScoreUpdater& operator=(const CUDAScoreUpdater&) = delete; | ||
|
||
CUDAScoreUpdater(const CUDAScoreUpdater&) = delete; | ||
|
||
private: | ||
void InitCUDA(const size_t total_size); | ||
|
||
void LaunchAddScoreConstantKernel(const double val, const size_t offset); | ||
|
||
void LaunchMultiplyScoreConstantKernel(const double val, const size_t offset); | ||
|
||
double* cuda_score_; | ||
|
||
const int num_threads_per_block_; | ||
|
||
const bool boosting_on_cuda_; | ||
}; | ||
|
||
} // namespace LightGBM | ||
|
||
#endif // USE_CUDA_EXP | ||
|
||
#endif // LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_ |
Oops, something went wrong.