@@ -572,13 +572,8 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output,
572572 }
573573}
574574
575- //
576- // Huber loss for robust 2-classes classification
577- //
578- REGISTER_LAYER (huber, HuberTwoClass);
579-
580- bool HuberTwoClass::init (const LayerMap& layerMap,
581- const ParameterMap& parameterMap) {
575+ bool HuberCost::init (const LayerMap& layerMap,
576+ const ParameterMap& parameterMap) {
582577 CostLayer::init (layerMap, parameterMap);
583578 if (useGpu_) {
584579 tmpCpuInput_.reserve (inputLayers_.size ());
@@ -589,69 +584,131 @@ bool HuberTwoClass::init(const LayerMap& layerMap,
589584 return true ;
590585}
591586
592- void HuberTwoClass ::forwardImp (Matrix& output, Argument& label, Matrix& cost) {
587+ void HuberCost ::forwardImp (Matrix& output, Argument& label, Matrix& cost) {
593588 if (useGpu_) {
594589 for (size_t i = 0 ; i < inputLayers_.size (); i++) {
595590 tmpCpuInput_[i].resizeAndCopyFrom (
596591 getInput (i), false , HPPL_STREAM_DEFAULT);
597592 }
598593 hl_stream_synchronize (HPPL_STREAM_DEFAULT);
599594 }
600- forwardImpIn (output, label, cost);
601595}
602596
603- void HuberTwoClass::forwardImpIn (Matrix& output,
604- Argument& label,
605- Matrix& target) {
597+ //
598+ // Huber loss for robust regression.
599+ //
600+ REGISTER_LAYER (huber_regression, HuberRegressionLoss);
601+
602+ bool HuberRegressionLoss::init (const LayerMap& layerMap,
603+ const ParameterMap& parameterMap) {
604+ HuberCost::init (layerMap, parameterMap);
605+ delta_ = config_.delta ();
606+ return true ;
607+ }
608+
609+ void HuberRegressionLoss::forwardImp (Matrix& output,
610+ Argument& label,
611+ Matrix& target) {
612+ HuberCost::forwardImp (output, label, target);
613+ size_t numSamples = target.getHeight ();
614+ size_t dim = output.getWidth ();
615+ CHECK (label.value );
616+ CHECK_EQ ((*label.value ).getHeight (), numSamples);
617+ CHECK_EQ (output.getHeight (), numSamples);
618+ CHECK_EQ (dim, (*label.value ).getWidth ());
619+ CHECK_EQ (target.getWidth (), (size_t )1 );
620+
621+ real* out = useGpu_ ? tmpCpuInput_[0 ].value ->getData () : output.getData ();
622+ real* lbl =
623+ useGpu_ ? tmpCpuInput_[1 ].value ->getData () : (*label.value ).getData ();
624+ std::vector<real> cost (numSamples, 0 );
625+ for (size_t i = 0 ; i < numSamples; ++i) {
626+ for (size_t j = 0 ; j < dim; ++j) {
627+ int index = i * dim + j;
628+ real a = std::abs (lbl[index] - out[index]);
629+ if (a <= delta_)
630+ cost[i] += a * a / 2 ;
631+ else
632+ cost[i] += delta_ * (a - delta_ / 2 );
633+ }
634+ }
635+ target.copyFrom (cost.data (), numSamples);
636+ }
637+
638+ void HuberRegressionLoss::backwardImp (Matrix& output,
639+ Argument& label,
640+ Matrix& outputG) {
641+ size_t numSamples = output.getHeight ();
642+ size_t dim = output.getWidth ();
643+ real* out = useGpu_ ? tmpCpuInput_[0 ].value ->getData () : output.getData ();
644+ real* lbl =
645+ useGpu_ ? tmpCpuInput_[1 ].value ->getData () : (*label.value ).getData ();
646+ real* grad = useGpu_ ? tmpCpuInput_[0 ].grad ->getData () : outputG.getData ();
647+ for (size_t i = 0 ; i < numSamples; ++i) {
648+ for (size_t j = 0 ; j < dim; ++j) {
649+ int index = i * dim + j;
650+ real a = lbl[index] - out[index];
651+ if (std::abs (a) <= delta_)
652+ grad[index] += -a;
653+ else
654+ grad[index] += a > 0 ? -delta_ : delta_;
655+ }
656+ }
657+ if (useGpu_) outputG.copyFrom (grad, numSamples * dim);
658+ }
659+
660+ //
661+ // Huber loss for robust 2-classes classification
662+ //
663+ REGISTER_LAYER (huber_classification, HuberTwoClassification);
664+
665+ bool HuberTwoClassification::init (const LayerMap& layerMap,
666+ const ParameterMap& parameterMap) {
667+ return HuberCost::init (layerMap, parameterMap);
668+ }
669+
670+ void HuberTwoClassification::forwardImp (Matrix& output,
671+ Argument& label,
672+ Matrix& target) {
673+ HuberCost::forwardImp (output, label, target);
606674 size_t numSamples = target.getHeight ();
675+ CHECK (label.ids );
607676 CHECK_EQ ((*label.ids ).getSize (), numSamples);
608677 CHECK_EQ (output.getHeight (), numSamples);
609678 CHECK_EQ (output.getWidth (), (size_t )1 );
610679 CHECK_EQ (target.getWidth (), (size_t )1 );
611680
612681 real* out = useGpu_ ? tmpCpuInput_[0 ].value ->getData () : output.getData ();
613682 int * lbl = useGpu_ ? tmpCpuInput_[1 ].ids ->getData () : (*label.ids ).getData ();
614- std::vector<real> cost (numSamples);
683+ std::vector<real> cost (numSamples, 0 );
615684 for (size_t i = 0 ; i < numSamples; ++i) {
616685 int y = 2 * lbl[i] - 1 ;
617- if (out[i] * y < -1 )
618- cost[i] = -4 * out[i] * y;
619- else if (out[i] * y < 1 )
620- cost[i] = (1 - out[i] * y) * (1 - out[i] * y);
621- else
622- cost[i] = 0 ;
686+ real a = out[i] * y;
687+ if (a < -1 )
688+ cost[i] = -4 * a;
689+ else if (a < 1 )
690+ cost[i] = (1 - a) * (1 - a);
623691 }
624692 target.copyFrom (cost.data (), numSamples);
625693}
626694
627- void HuberTwoClass::backwardImp (Matrix& outputValue,
628- Argument& label,
629- Matrix& outputGrad) {
630- if (useGpu_) {
631- backwardImpIn (
632- *tmpCpuInput_[0 ].value , tmpCpuInput_[1 ], *tmpCpuInput_[0 ].grad );
633- outputGrad.copyFrom (*tmpCpuInput_[0 ].grad );
634- } else {
635- backwardImpIn (outputValue, label, outputGrad);
636- }
637- }
638-
639- void HuberTwoClass::backwardImpIn (Matrix& output,
640- Argument& label,
641- Matrix& outputG) {
695+ void HuberTwoClassification::backwardImp (Matrix& output,
696+ Argument& label,
697+ Matrix& outputG) {
642698 size_t numSamples = output.getHeight ();
643- real* out = output.getData ();
644- real* grad = outputG .getData ();
645- int * lbl = (*label. ids ) .getData ();
699+ real* out = useGpu_ ? tmpCpuInput_[ 0 ]. value -> getData () : output.getData ();
700+ int * lbl = useGpu_ ? tmpCpuInput_[ 1 ]. ids -> getData () : (*label. ids ) .getData ();
701+ real* grad = useGpu_ ? tmpCpuInput_[ 0 ]. grad -> getData () : outputG .getData ();
646702 for (size_t i = 0 ; i < numSamples; ++i) {
647703 int y = 2 * lbl[i] - 1 ;
648- if (y * out[i] < -1 )
704+ real a = out[i] * y;
705+ if (a < -1 )
649706 grad[i] += -4 * y;
650- else if (y * out[i] < 1 )
651- grad[i] += -2 * (1 - y * out[i] ) * y;
707+ else if (a < 1 )
708+ grad[i] += -2 * (1 - a ) * y;
652709 }
710+ if (useGpu_) outputG.copyFrom (grad, numSamples);
653711}
654-
655712/* *
656713 * This cost layer compute the sum of its input as loss.
657714 * \f[
0 commit comments