forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathUnfoldBackward.h
186 lines (146 loc) · 5.39 KB
/
UnfoldBackward.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#pragma once
#include <ATen/core/Tensor.h>
#include <ATen/Dispatch.h>
#include <ATen/native/DispatchStub.h>
#include <ATen/native/TensorIterator.h>
#include <ATen/native/ReduceOpsUtils.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#else
#include <ATen/ops/arange.h>
#endif
namespace at { namespace native {
using unfold_backward_fn = void (*)(
Tensor& grad_in,
const Tensor& grad,
int64_t dim,
int64_t size,
int64_t step
);
DECLARE_DISPATCH(unfold_backward_fn, unfold_backward_stub);
namespace {
// Note on naming: it is unconventional.
// grad_in does not mean that it is a gradient wrt to input,
// grad_in/grad_out is just an input/output of unfold_backward kernel.
static C10_UNUSED TensorIterator _make_unfold_backward_iter_over_grad_out(
Tensor& grad_out,
const Tensor& grad_in,
int64_t dim,
int64_t size,
int64_t step
) {
dim = maybe_wrap_dim(dim, grad_out.dim());
// last dim stores the folds
auto grad_out_dim_size = ensure_nonempty_size(grad_out, dim);
auto grad_in_dim_size = ensure_nonempty_size(grad_in, dim);
// dictates the number of elements to iterate over
// in dimension `dim`
auto iter_dim_size = std::min(
grad_out_dim_size,
(grad_in_dim_size - 1) * step + size
);
/* prepare grad_out for TensorIterator { */
auto grad_out_strides = ensure_nonempty_vec(grad_out.strides().vec());
auto grad_out_sizes = ensure_nonempty_vec(grad_out.sizes().vec());
grad_out_sizes[dim] = iter_dim_size;
auto grad_out_restrided = grad_out.as_strided(
grad_out_sizes, grad_out_strides
);
/* } */
/* prepare grad_in for TensorIterator { */
auto grad_in_strides = ensure_nonempty_vec(grad_in.strides().vec());
auto grad_in_sizes = ensure_nonempty_vec(grad_in.sizes().vec());
// set strides for dim to 0
// and size to 1 because
// this dimension is indexed inside the kernel
grad_in_strides[dim] = 0;
grad_in_sizes[dim] = 1;
grad_in_strides.pop_back();
grad_in_sizes.pop_back();
auto grad_in_restrided = grad_in.squeeze(-1).as_strided(
grad_in_sizes, grad_in_strides
);
/* } */
// During the TensorIterator iteration we have to know
// i_dim in grad_out[i_1,...,i_dim,...i_n],
// idx_dim stores this information
/* prepare idx_dim for TensorIterator { */
auto idx_dim = at::arange(
0, iter_dim_size, grad_in.options().dtype(at::kLong)
);
auto grad_out_dim = ensure_nonempty_dim(grad_out.dim());
auto idx_dim_strides = std::vector<int64_t>(grad_out_dim, 0);
auto idx_dim_sizes = std::vector<int64_t>(grad_out_dim, 1);
idx_dim_strides[dim] = 1;
idx_dim_sizes[dim] = iter_dim_size;
// idx_dim size will broadcast over determined by grad_out sizes in TensorIterator
auto idx_dim_restrided = idx_dim.as_strided(idx_dim_sizes, idx_dim_strides);
/* } */
auto iter = TensorIteratorConfig()
.set_check_mem_overlap(false)
.check_all_same_dtype(false)
.resize_outputs(false)
.add_owned_output(grad_out_restrided)
.add_owned_input(grad_in_restrided)
.add_owned_input(idx_dim_restrided)
.build();
return iter;
}
static C10_UNUSED TensorIterator _make_unfold_backward_iter_over_grad_in(
Tensor& grad_out,
const Tensor& grad_in,
int64_t dim,
int64_t /*size*/,
int64_t /*step*/
) {
dim = maybe_wrap_dim(dim, grad_out.dim());
// last dim stores the folds
auto last_dim = maybe_wrap_dim(-1, grad_in.dim());
auto grad_in_dim = ensure_nonempty_dim(grad_in.dim());
auto grad_in_dim_size = ensure_nonempty_size(grad_in, dim);
auto grad_in_last_dim_size = ensure_nonempty_size(grad_in, last_dim);
/* prepare grad_out for TensorIterator { */
auto grad_out_restrided = grad_out.unsqueeze(-1);
auto grad_out_strides = ensure_nonempty_vec(grad_out_restrided.strides().vec());
auto grad_out_sizes = ensure_nonempty_vec(grad_out_restrided.sizes().vec());
grad_out_strides[dim] = 0;
grad_out_strides[last_dim] = 0;
grad_out_sizes[dim] = grad_in_dim_size;
grad_out_sizes[last_dim] = grad_in_last_dim_size;
grad_out_restrided = grad_out_restrided.as_strided(grad_out_sizes, grad_out_strides);
/* } */
// for each element grad_out[i_1,...,i_dim,...,i_last_dim]
// we have to know i_dim and i_last_dim.
// This information is stored in Tensors
// idx_dim and idx_last_dim
/* prepare idx_dim and idx_last_dim for TensorIterator { */
auto idx_dim = at::arange(
0, grad_in_dim_size, grad_in.options().dtype(at::kLong)
);
auto idx_dim_strides = std::vector<int64_t>(grad_in_dim, 0);
auto idx_dim_sizes = std::vector<int64_t>(grad_in_dim, 1);
idx_dim_strides[dim] = 1;
idx_dim_sizes[dim] = grad_in_dim_size;
auto idx_dim_restrided = idx_dim.as_strided(idx_dim_sizes, idx_dim_strides);
auto idx_last_dim = at::arange(
0, grad_in_last_dim_size, grad_in.options().dtype(at::kLong)
);
auto idx_last_dim_strides = std::vector<int64_t>(grad_in_dim, 0);
auto idx_last_dim_sizes = std::vector<int64_t>(grad_in_dim, 1);
idx_last_dim_strides[last_dim] = 1;
idx_last_dim_sizes[last_dim] = grad_in_last_dim_size;
auto idx_last_dim_restrided = idx_last_dim.as_strided(idx_last_dim_sizes, idx_last_dim_strides);
/* } */
auto iter = TensorIteratorConfig()
.set_check_mem_overlap(false)
.check_all_same_dtype(false)
.resize_outputs(false)
.add_owned_output(grad_out_restrided)
.add_owned_input(grad_in)
.add_owned_input(idx_dim_restrided)
.add_owned_input(idx_last_dim_restrided)
.build();
return iter;
}
}
}} // namespace at::native