-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathkl_loss_layer.cu
104 lines (92 loc) · 3.43 KB
/
kl_loss_layer.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include <vector>
#include <string>
#include "caffe/layers/kl_loss_layer.hpp"
namespace caffe {
template <typename Dtype>
__global__ void KlForward(const int n, const Dtype* in, const Dtype* alpha, Dtype* out){
// f(x) = e^(-alpha) * (x-1/2*th) + alpha/2 if |x| > th
// = e^(-alpha) * x^2 * 1/2/th + alpha/2 if |x| <= th
CUDA_KERNEL_LOOP(index, n) {
Dtype x = in[index];
Dtype abs_x = abs(x);
Dtype a = alpha[index];
if (abs_x > 1) {
out[index] = exp(-a) * (abs_x - 0.5) + a * 0.5;
}
else {
out[index] = exp(-a) * x * x * 0.5 + a * 0.5;
}
}
}
template <typename Dtype>
void KlLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
int count = bottom[0]->count();
caffe_gpu_sub(count, bottom[0]->gpu_data(), bottom[2]->gpu_data(), diff_.mutable_gpu_data());
KlForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, diff_.gpu_data(), bottom[1]->gpu_data(), error_.mutable_gpu_data());
CUDA_POST_KERNEL_CHECK;
normalize_divider_=bottom[0]->count();
if (has_weights_) {
caffe_gpu_mul(count, bottom[3]->gpu_data(), error_.gpu_data(), error_.mutable_gpu_data());
caffe_gpu_asum(count, bottom[3]->gpu_data(), &normalize_divider_);
}
Dtype loss;
caffe_gpu_asum(count, error_.mutable_gpu_data(), &loss);
normalize_divider_+=0.00001f;
top[0]->mutable_cpu_data()[0]=loss/normalize_divider_;
}
template <typename Dtype>
__global__ void KlBackward(const int n, const Dtype* in1, const Dtype* in2,
const Dtype* in3, const Dtype* in4, Dtype* out1, Dtype* out2) {
// f'(xe) = e^(-alpha) * (xe - xg) if |xg - xe| <= 1
// = -e^(-alpha) if |xg - xe| > 1 and xg > xe
// = e^(-alpha) if |xg - xe| > 1 and xg < xe
//
// f'(alpha) = -(xg - xe)^2 * 0.5 * e^(-alpha) + 0.5 if |xg - xe| <= 1
// = -(abs(xg-xe) - 0.5) * e^(-alpha) + 0.5
CUDA_KERNEL_LOOP(index, n) {
Dtype d = in1[index];//xe - xg
Dtype xe = in2[index];
Dtype xg = in3[index];
Dtype alpha = in4[index];
Dtype abs_d = abs(d);
Dtype ea = exp(-alpha);
if (abs_d <= 1) {
out1[index] = ea * d;
out2[index] = -d*d * 0.5 * ea + 0.5;
}
else {
if (xg > xe) {
out1[index] = -ea;
}
else {
out1[index] = ea;
}
out2[index] = -(abs_d - 0.5) * ea + 0.5;
}
}
}
template <typename Dtype>
void KlLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[0] && propagate_down[1]){
int count = diff_.count();
Dtype* bottom_diff1 = bottom[0]->mutable_gpu_diff();
Dtype* bottom_diff2 = bottom[1]->mutable_gpu_diff();
KlBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, diff_.gpu_data(), bottom[0]->gpu_data(),bottom[2]->gpu_data(),
bottom[1]->gpu_data(), bottom_diff1, bottom_diff2);
CUDA_POST_KERNEL_CHECK;
if (has_weights_) {
const Dtype* label_weight = bottom[3]->gpu_data();
caffe_gpu_mul(count, label_weight, bottom[0]->gpu_diff(), bottom_diff1);
caffe_gpu_mul(count, label_weight, bottom[1]->gpu_diff(), bottom_diff2);
}
const Dtype loss_weight = top[0]->cpu_diff()[0] / normalize_divider_;
caffe_gpu_scal(count, loss_weight , bottom_diff1);
caffe_gpu_scal(count, loss_weight , bottom_diff2);
}
}
INSTANTIATE_LAYER_GPU_FUNCS(KlLossLayer);
} // namespace caffe