Skip to content

Commit

Permalink
[CUDA] Add L2 metric for new CUDA version (microsoft#5633)
Browse files Browse the repository at this point in the history
* add rmse metric for new cuda version

* add Init for CUDAMetricInterface

* fix lint errors

* fix rmse and add l2 metric for new cuda version

* use CUDAL2Metric

* explicit template instantiation

* write result only with the first thread

* pre allocate buffer for output converting

* fix l2 regression with cuda metric evaluation

* weighting loss in cuda metric evaluation

* mark CUDATree::AsConstantTree as override
  • Loading branch information
shiyu1994 authored Dec 27, 2022
1 parent 36055d4 commit 6482b47
Show file tree
Hide file tree
Showing 12 changed files with 58 additions and 15 deletions.
2 changes: 2 additions & 0 deletions include/LightGBM/cuda/cuda_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ class CUDATree : public Tree {
const data_size_t* used_data_indices,
data_size_t num_data, double* score) const override;

inline void AsConstantTree(double val) override;

const int* cuda_leaf_parent() const { return cuda_leaf_parent_; }

const int* cuda_left_child() const { return cuda_left_child_; }
Expand Down
3 changes: 3 additions & 0 deletions include/LightGBM/cuda/cuda_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ class CUDAVector {
}

void Resize(size_t size) {
if (size == size_) {
return;
}
if (size == 0) {
Clear();
}
Expand Down
5 changes: 4 additions & 1 deletion include/LightGBM/objective_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,12 @@ class ObjectiveFunction {
/*!
* \brief Convert output for CUDA version
*/
const double* ConvertOutputCUDA(data_size_t /*num_data*/, const double* input, double* /*output*/) const {
virtual const double* ConvertOutputCUDA(data_size_t /*num_data*/, const double* input, double* /*output*/) const {
return input;
}

virtual bool NeedConvertOutputCUDA () const { return false; }

#endif // USE_CUDA_EXP
};

Expand Down
2 changes: 1 addition & 1 deletion include/LightGBM/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ class Tree {
shrinkage_ = 1.0f;
}

inline void AsConstantTree(double val) {
virtual inline void AsConstantTree(double val) {
num_leaves_ = 1;
shrinkage_ = 1.0f;
leaf_value_[0] = val;
Expand Down
4 changes: 4 additions & 0 deletions src/io/cuda/cuda_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,10 @@ void CUDATree::SyncLeafOutputFromCUDAToHost() {
CopyFromCUDADeviceToHost<double>(leaf_value_.data(), cuda_leaf_value_, leaf_value_.size(), __FILE__, __LINE__);
}

void CUDATree::AsConstantTree(double val) {
Tree::AsConstantTree(val);
CopyFromHostToCUDADevice<double>(cuda_leaf_value_, &val, 1, __FILE__, __LINE__);
}

} // namespace LightGBM

Expand Down
8 changes: 7 additions & 1 deletion src/metric/cuda/cuda_regression_metric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,19 @@ void CUDARegressionMetricInterface<HOST_METRIC, CUDA_METRIC>::Init(const Metadat

template <typename HOST_METRIC, typename CUDA_METRIC>
std::vector<double> CUDARegressionMetricInterface<HOST_METRIC, CUDA_METRIC>::Eval(const double* score, const ObjectiveFunction* objective) const {
const double* score_convert = objective->ConvertOutputCUDA(this->num_data_, score, score_convert_buffer_.RawData());
const double* score_convert = score;
if (objective != nullptr && objective->NeedConvertOutputCUDA()) {
score_convert_buffer_.Resize(static_cast<size_t>(this->num_data_) * static_cast<size_t>(this->num_class_));
score_convert = objective->ConvertOutputCUDA(this->num_data_, score, score_convert_buffer_.RawData());
}
const double eval_score = LaunchEvalKernel(score_convert);
return std::vector<double>{eval_score};
}

CUDARMSEMetric::CUDARMSEMetric(const Config& config): CUDARegressionMetricInterface<RMSEMetric, CUDARMSEMetric>(config) {}

CUDAL2Metric::CUDAL2Metric(const Config& config): CUDARegressionMetricInterface<L2Metric, CUDAL2Metric>(config) {}

} // namespace LightGBM

#endif // USE_CUDA_EXP
13 changes: 10 additions & 3 deletions src/metric/cuda/cuda_regression_metric.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,22 @@ __global__ void EvalKernel(const data_size_t num_data, const label_t* labels, co
const data_size_t index = static_cast<data_size_t>(threadIdx.x + blockIdx.x * blockDim.x);
double point_metric = 0.0;
if (index < num_data) {
point_metric = CUDA_METRIC::MetricOnPointCUDA(labels[index], scores[index]);
point_metric = USE_WEIGHTS ?
CUDA_METRIC::MetricOnPointCUDA(labels[index], scores[index]) * weights[index] :
CUDA_METRIC::MetricOnPointCUDA(labels[index], scores[index]);
}
const double block_sum_point_metric = ShuffleReduceSum<double>(point_metric, shared_mem_buffer, NUM_DATA_PER_EVAL_THREAD);
reduce_block_buffer[blockIdx.x] = block_sum_point_metric;
if (threadIdx.x == 0) {
reduce_block_buffer[blockIdx.x] = block_sum_point_metric;
}
if (USE_WEIGHTS) {
double weight = 0.0;
if (index < num_data) {
weight = static_cast<double>(weights[index]);
const double block_sum_weight = ShuffleReduceSum<double>(weight, shared_mem_buffer, NUM_DATA_PER_EVAL_THREAD);
reduce_block_buffer[blockIdx.x + blockDim.x] = block_sum_weight;
if (threadIdx.x == 0) {
reduce_block_buffer[blockIdx.x + gridDim.x] = block_sum_weight;
}
}
}
}
Expand All @@ -55,6 +61,7 @@ double CUDARegressionMetricInterface<HOST_METRIC, CUDA_METRIC>::LaunchEvalKernel
}

template double CUDARegressionMetricInterface<RMSEMetric, CUDARMSEMetric>::LaunchEvalKernel(const double* score) const;
template double CUDARegressionMetricInterface<L2Metric, CUDAL2Metric>::LaunchEvalKernel(const double* score) const;

} // namespace LightGBM

Expand Down
20 changes: 16 additions & 4 deletions src/metric/cuda/cuda_regression_metric.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace LightGBM {
template <typename HOST_METRIC, typename CUDA_METRIC>
class CUDARegressionMetricInterface: public CUDAMetricInterface<HOST_METRIC> {
public:
explicit CUDARegressionMetricInterface(const Config& config): CUDAMetricInterface<HOST_METRIC>(config) {}
explicit CUDARegressionMetricInterface(const Config& config): CUDAMetricInterface<HOST_METRIC>(config), num_class_(config.num_class) {}

virtual ~CUDARegressionMetricInterface() {}

Expand All @@ -34,9 +34,10 @@ class CUDARegressionMetricInterface: public CUDAMetricInterface<HOST_METRIC> {
protected:
double LaunchEvalKernel(const double* score_convert) const;

CUDAVector<double> score_convert_buffer_;
mutable CUDAVector<double> score_convert_buffer_;
CUDAVector<double> reduce_block_buffer_;
CUDAVector<double> reduce_block_buffer_inner_;
const int num_class_;
};

class CUDARMSEMetric: public CUDARegressionMetricInterface<RMSEMetric, CUDARMSEMetric> {
Expand All @@ -45,8 +46,19 @@ class CUDARMSEMetric: public CUDARegressionMetricInterface<RMSEMetric, CUDARMSEM

virtual ~CUDARMSEMetric() {}

__device__ static double MetricOnPointCUDA(label_t label, double score) {
return (score - static_cast<double>(label));
__device__ inline static double MetricOnPointCUDA(label_t label, double score) {
return (score - label) * (score - label);
}
};

class CUDAL2Metric : public CUDARegressionMetricInterface<L2Metric, CUDAL2Metric> {
public:
explicit CUDAL2Metric(const Config& config);

virtual ~CUDAL2Metric() {}

__device__ inline static double MetricOnPointCUDA(label_t label, double score) {
return (score - label) * (score - label);
}
};

Expand Down
3 changes: 1 addition & 2 deletions src/metric/metric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ Metric* Metric::CreateMetric(const std::string& type, const Config& config) {
#ifdef USE_CUDA_EXP
if (config.device_type == std::string("cuda_exp")) {
if (type == std::string("l2")) {
Log::Warning("Metric l2 is not implemented in cuda_exp version. Fall back to evaluation on CPU.");
return new L2Metric(config);
return new CUDAL2Metric(config);
} else if (type == std::string("rmse")) {
return new CUDARMSEMetric(config);
} else if (type == std::string("l1")) {
Expand Down
8 changes: 6 additions & 2 deletions src/objective/cuda/cuda_regression_objective.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,12 @@ __global__ void ConvertOutputCUDAKernel_Regression(const bool sqrt, const data_s

const double* CUDARegressionL2loss::LaunchConvertOutputCUDAKernel(const data_size_t num_data, const double* input, double* output) const {
const int num_blocks = (num_data + GET_GRADIENTS_BLOCK_SIZE_REGRESSION - 1) / GET_GRADIENTS_BLOCK_SIZE_REGRESSION;
ConvertOutputCUDAKernel_Regression<<<num_blocks, GET_GRADIENTS_BLOCK_SIZE_REGRESSION>>>(sqrt_, num_data, input, output);
return output;
if (sqrt_) {
ConvertOutputCUDAKernel_Regression<<<num_blocks, GET_GRADIENTS_BLOCK_SIZE_REGRESSION>>>(sqrt_, num_data, input, output);
return output;
} else {
return input;
}
}

template <bool USE_WEIGHT>
Expand Down
4 changes: 4 additions & 0 deletions src/objective/cuda/cuda_regression_objective.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class CUDARegressionL2loss : public CUDARegressionObjectiveInterface<RegressionL
void LaunchGetGradientsKernel(const double* score, score_t* gradients, score_t* hessians) const override;

const double* LaunchConvertOutputCUDAKernel(const data_size_t num_data, const double* input, double* output) const override;

bool NeedConvertOutputCUDA() const override { return sqrt_; }
};


Expand Down Expand Up @@ -123,6 +125,8 @@ class CUDARegressionPoissonLoss : public CUDARegressionObjectiveInterface<Regres

const double* LaunchConvertOutputCUDAKernel(const data_size_t num_data, const double* input, double* output) const override;

bool NeedConvertOutputCUDA() const override { return true; }

double LaunchCalcInitScoreKernel(const int class_id) const override;

void LaunchCheckLabelKernel() const;
Expand Down
1 change: 0 additions & 1 deletion src/objective/regression_objective.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,6 @@ class RegressionFairLoss: public RegressionL2loss {
class RegressionPoissonLoss: public RegressionL2loss {
public:
explicit RegressionPoissonLoss(const Config& config): RegressionL2loss(config) {
Log::Warning("RegressionPoissonLoss is created again");
max_delta_step_ = static_cast<double>(config.poisson_max_delta_step);
if (sqrt_) {
Log::Warning("Cannot use sqrt transform in %s Regression, will auto disable it", GetName());
Expand Down

0 comments on commit 6482b47

Please sign in to comment.