Skip to content

Commit 47eb869

Browse files
authored
Merge pull request #3571 from luotao1/huber_loss
refine Huber loss, add huber_regression_cost
2 parents 58419e7 + b709af6 commit 47eb869

File tree

10 files changed

+289
-80
lines changed

10 files changed

+289
-80
lines changed

doc/api/v2/config/layer.rst

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -419,9 +419,14 @@ multi_binary_label_cross_entropy_cost
419419
.. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost
420420
:noindex:
421421

422-
huber_cost
423-
----------
424-
.. autoclass:: paddle.v2.layer.huber_cost
422+
huber_regression_cost
423+
-------------------------
424+
.. autoclass:: paddle.v2.layer.huber_regression_cost
425+
:noindex:
426+
427+
huber_classification_cost
428+
-------------------------
429+
.. autoclass:: paddle.v2.layer.huber_classification_cost
425430
:noindex:
426431

427432
lambda_cost

paddle/gserver/layers/CostLayer.cpp

Lines changed: 98 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -572,13 +572,8 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output,
572572
}
573573
}
574574

575-
//
576-
// Huber loss for robust 2-classes classification
577-
//
578-
REGISTER_LAYER(huber, HuberTwoClass);
579-
580-
bool HuberTwoClass::init(const LayerMap& layerMap,
581-
const ParameterMap& parameterMap) {
575+
bool HuberCost::init(const LayerMap& layerMap,
576+
const ParameterMap& parameterMap) {
582577
CostLayer::init(layerMap, parameterMap);
583578
if (useGpu_) {
584579
tmpCpuInput_.reserve(inputLayers_.size());
@@ -589,69 +584,131 @@ bool HuberTwoClass::init(const LayerMap& layerMap,
589584
return true;
590585
}
591586

592-
void HuberTwoClass::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
587+
void HuberCost::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
593588
if (useGpu_) {
594589
for (size_t i = 0; i < inputLayers_.size(); i++) {
595590
tmpCpuInput_[i].resizeAndCopyFrom(
596591
getInput(i), false, HPPL_STREAM_DEFAULT);
597592
}
598593
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
599594
}
600-
forwardImpIn(output, label, cost);
601595
}
602596

603-
void HuberTwoClass::forwardImpIn(Matrix& output,
604-
Argument& label,
605-
Matrix& target) {
597+
//
598+
// Huber loss for robust regression.
599+
//
600+
REGISTER_LAYER(huber_regression, HuberRegressionLoss);
601+
602+
bool HuberRegressionLoss::init(const LayerMap& layerMap,
603+
const ParameterMap& parameterMap) {
604+
HuberCost::init(layerMap, parameterMap);
605+
delta_ = config_.delta();
606+
return true;
607+
}
608+
609+
void HuberRegressionLoss::forwardImp(Matrix& output,
610+
Argument& label,
611+
Matrix& target) {
612+
HuberCost::forwardImp(output, label, target);
613+
size_t numSamples = target.getHeight();
614+
size_t dim = output.getWidth();
615+
CHECK(label.value);
616+
CHECK_EQ((*label.value).getHeight(), numSamples);
617+
CHECK_EQ(output.getHeight(), numSamples);
618+
CHECK_EQ(dim, (*label.value).getWidth());
619+
CHECK_EQ(target.getWidth(), (size_t)1);
620+
621+
real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
622+
real* lbl =
623+
useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData();
624+
std::vector<real> cost(numSamples, 0);
625+
for (size_t i = 0; i < numSamples; ++i) {
626+
for (size_t j = 0; j < dim; ++j) {
627+
int index = i * dim + j;
628+
real a = std::abs(lbl[index] - out[index]);
629+
if (a <= delta_)
630+
cost[i] += a * a / 2;
631+
else
632+
cost[i] += delta_ * (a - delta_ / 2);
633+
}
634+
}
635+
target.copyFrom(cost.data(), numSamples);
636+
}
637+
638+
void HuberRegressionLoss::backwardImp(Matrix& output,
639+
Argument& label,
640+
Matrix& outputG) {
641+
size_t numSamples = output.getHeight();
642+
size_t dim = output.getWidth();
643+
real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
644+
real* lbl =
645+
useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData();
646+
real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
647+
for (size_t i = 0; i < numSamples; ++i) {
648+
for (size_t j = 0; j < dim; ++j) {
649+
int index = i * dim + j;
650+
real a = lbl[index] - out[index];
651+
if (std::abs(a) <= delta_)
652+
grad[index] += -a;
653+
else
654+
grad[index] += a > 0 ? -delta_ : delta_;
655+
}
656+
}
657+
if (useGpu_) outputG.copyFrom(grad, numSamples * dim);
658+
}
659+
660+
//
661+
// Huber loss for robust 2-classes classification
662+
//
663+
REGISTER_LAYER(huber_classification, HuberTwoClassification);
664+
665+
bool HuberTwoClassification::init(const LayerMap& layerMap,
666+
const ParameterMap& parameterMap) {
667+
return HuberCost::init(layerMap, parameterMap);
668+
}
669+
670+
void HuberTwoClassification::forwardImp(Matrix& output,
671+
Argument& label,
672+
Matrix& target) {
673+
HuberCost::forwardImp(output, label, target);
606674
size_t numSamples = target.getHeight();
675+
CHECK(label.ids);
607676
CHECK_EQ((*label.ids).getSize(), numSamples);
608677
CHECK_EQ(output.getHeight(), numSamples);
609678
CHECK_EQ(output.getWidth(), (size_t)1);
610679
CHECK_EQ(target.getWidth(), (size_t)1);
611680

612681
real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
613682
int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData();
614-
std::vector<real> cost(numSamples);
683+
std::vector<real> cost(numSamples, 0);
615684
for (size_t i = 0; i < numSamples; ++i) {
616685
int y = 2 * lbl[i] - 1;
617-
if (out[i] * y < -1)
618-
cost[i] = -4 * out[i] * y;
619-
else if (out[i] * y < 1)
620-
cost[i] = (1 - out[i] * y) * (1 - out[i] * y);
621-
else
622-
cost[i] = 0;
686+
real a = out[i] * y;
687+
if (a < -1)
688+
cost[i] = -4 * a;
689+
else if (a < 1)
690+
cost[i] = (1 - a) * (1 - a);
623691
}
624692
target.copyFrom(cost.data(), numSamples);
625693
}
626694

627-
void HuberTwoClass::backwardImp(Matrix& outputValue,
628-
Argument& label,
629-
Matrix& outputGrad) {
630-
if (useGpu_) {
631-
backwardImpIn(
632-
*tmpCpuInput_[0].value, tmpCpuInput_[1], *tmpCpuInput_[0].grad);
633-
outputGrad.copyFrom(*tmpCpuInput_[0].grad);
634-
} else {
635-
backwardImpIn(outputValue, label, outputGrad);
636-
}
637-
}
638-
639-
void HuberTwoClass::backwardImpIn(Matrix& output,
640-
Argument& label,
641-
Matrix& outputG) {
695+
void HuberTwoClassification::backwardImp(Matrix& output,
696+
Argument& label,
697+
Matrix& outputG) {
642698
size_t numSamples = output.getHeight();
643-
real* out = output.getData();
644-
real* grad = outputG.getData();
645-
int* lbl = (*label.ids).getData();
699+
real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
700+
int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData();
701+
real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
646702
for (size_t i = 0; i < numSamples; ++i) {
647703
int y = 2 * lbl[i] - 1;
648-
if (y * out[i] < -1)
704+
real a = out[i] * y;
705+
if (a < -1)
649706
grad[i] += -4 * y;
650-
else if (y * out[i] < 1)
651-
grad[i] += -2 * (1 - y * out[i]) * y;
707+
else if (a < 1)
708+
grad[i] += -2 * (1 - a) * y;
652709
}
710+
if (useGpu_) outputG.copyFrom(grad, numSamples);
653711
}
654-
655712
/**
656713
* This cost layer compute the sum of its input as loss.
657714
* \f[

paddle/gserver/layers/CostLayer.h

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -304,37 +304,68 @@ class MultiBinaryLabelCrossEntropy : public CostLayer {
304304
Matrix& outputGrad) override;
305305
};
306306

307-
/**
308-
* Huber loss for robust 2-classes classification.
309-
*
310-
* For label={0, 1}, let y=2*label-1. Given output f, the loss is:
311-
* \f[
312-
* Loss =
313-
* \left\{\begin{matrix}
314-
* 4 * y * f & \textit{if} \ \ y* f < -1 \\
315-
* (1 - y * f)^2 & \textit{if} \ \ -1 < y * f < 1 \\
316-
* 0 & \textit{otherwise}
317-
* \end{matrix}\right.
318-
* \f]
307+
/*
308+
* A base layer for HuberRegressionLoss and HuberTwoClassification.
319309
*/
320-
class HuberTwoClass : public CostLayer {
310+
class HuberCost : public CostLayer {
311+
public:
321312
std::vector<Argument> tmpCpuInput_;
322313

323-
public:
324-
explicit HuberTwoClass(const LayerConfig& config) : CostLayer(config) {}
314+
explicit HuberCost(const LayerConfig& config) : CostLayer(config) {}
325315

326316
bool init(const LayerMap& layerMap,
327317
const ParameterMap& parameterMap) override;
328318

329319
void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
330320

331-
void forwardImpIn(Matrix& output, Argument& label, Matrix& cost);
321+
void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {}
322+
};
323+
324+
/**
325+
* Huber loss for robust regression.
326+
*
327+
* Given output f(x), label y and delta, the loss is:
328+
* Loss = 0.5 * (1 - y * f)^2, if abs(y - f) <= delta \\
329+
* Loss = delta * abs(y - f) - 0.5 * delta^2, otherwise
330+
*/
331+
class HuberRegressionLoss : public HuberCost {
332+
public:
333+
explicit HuberRegressionLoss(const LayerConfig& config) : HuberCost(config) {}
334+
335+
bool init(const LayerMap& layerMap,
336+
const ParameterMap& parameterMap) override;
337+
338+
void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
332339

333340
void backwardImp(Matrix& outputValue,
334341
Argument& label,
335342
Matrix& outputGrad) override;
336343

337-
void backwardImpIn(Matrix& outputValue, Argument& label, Matrix& outputGrad);
344+
protected:
345+
real delta_;
346+
};
347+
348+
/**
349+
* Huber loss for robust 2-classes classification.
350+
*
351+
* For label={0, 1}, let y=2*label-1. Given output f(x), the loss is:
352+
* Loss = 4 * y * f, if y* f < -1 \\
353+
* Loss = (1 - y * f)^2, if -1 < y * f < 1 \\
354+
* Loss = 0, otherwise
355+
*/
356+
class HuberTwoClassification : public HuberCost {
357+
public:
358+
explicit HuberTwoClassification(const LayerConfig& config)
359+
: HuberCost(config) {}
360+
361+
bool init(const LayerMap& layerMap,
362+
const ParameterMap& parameterMap) override;
363+
364+
void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
365+
366+
void backwardImp(Matrix& outputValue,
367+
Argument& label,
368+
Matrix& outputGrad) override;
338369
};
339370

340371
typedef std::shared_ptr<CostLayer> CostLayerPtr;

paddle/gserver/tests/test_LayerGrad.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -850,9 +850,27 @@ TEST(Layer, square_error_weighted) {
850850
}
851851
}
852852

853+
TEST(Layer, huber_regression_loss) {
854+
TestConfig config;
855+
config.layerConfig.set_type("huber_regression");
856+
config.biasSize = 0;
857+
858+
config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
859+
config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0});
860+
config.layerConfig.add_inputs();
861+
config.layerConfig.add_inputs();
862+
863+
for (auto useGpu : {false, true}) {
864+
for (auto delta : {1, 3, 5}) {
865+
config.layerConfig.set_delta(delta);
866+
testLayerGrad(config, "huber_regression", 100, /* trans */ false, useGpu);
867+
}
868+
}
869+
}
870+
853871
TEST(Layer, huber_two_class) {
854872
TestConfig config;
855-
config.layerConfig.set_type("huber");
873+
config.layerConfig.set_type("huber_classification");
856874
config.biasSize = 0;
857875

858876
config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0});
@@ -861,7 +879,7 @@ TEST(Layer, huber_two_class) {
861879
config.layerConfig.add_inputs();
862880

863881
for (auto useGpu : {false, true}) {
864-
testLayerGrad(config, "huber", 100, /* trans */ false, useGpu);
882+
testLayerGrad(config, "huber_two_class", 100, /* trans */ false, useGpu);
865883
}
866884
}
867885

proto/ModelConfig.proto

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,9 @@ message LayerConfig {
499499
optional int32 axis = 54 [ default = 2 ];
500500
repeated uint32 offset = 55;
501501
repeated uint32 shape = 56;
502+
503+
// for HuberRegressionLoss
504+
optional double delta = 57 [ default = 1.0 ];
502505
}
503506

504507
message EvaluatorConfig {

python/paddle/trainer/config_parser.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2274,7 +2274,7 @@ def init(cls, name, inputs, device=None, coeff=1.):
22742274
define_cost('SumOfSquaresCostLayer', 'square_error')
22752275
define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy')
22762276
define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
2277-
define_cost('HuberTwoClass', 'huber')
2277+
define_cost('HuberTwoClassification', 'huber_classification')
22782278
define_cost('SumCost', 'sum_cost')
22792279
define_cost('SmoothL1Cost', 'smooth_l1')
22802280

@@ -2336,6 +2336,17 @@ def __init__(self, name, inputs, NDCG_num=5, max_sort_size=-1, device=None):
23362336
self.config.max_sort_size = max_sort_size
23372337

23382338

2339+
@config_layer('huber_regression')
2340+
class HuberRegressionLoss(LayerBase):
2341+
def __init__(self, name, inputs, delta=1., coeff=1., device=None):
2342+
super(HuberRegressionLoss, self).__init__(
2343+
name, 'huber_regression', 1, inputs=inputs, device=device)
2344+
config_assert(
2345+
len(self.inputs) == 2, 'HuberRegression must have 2 inputs')
2346+
self.config.delta = delta
2347+
self.config.coeff = coeff
2348+
2349+
23392350
@config_layer('nce')
23402351
class NCELayer(LayerBase):
23412352
def __init__(self,

0 commit comments

Comments
 (0)