forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathim2col.h
96 lines (82 loc) · 2.78 KB
/
im2col.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#pragma once
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include <ATen/Utils.h>
#include <c10/util/irange.h>
#include <algorithm>
namespace at {
namespace native {
template <typename T>
static void im2col(
const T* data_im,
const int64_t channels,
const int64_t height,
const int64_t width,
const int64_t output_height,
const int64_t output_width,
const int64_t kernel_h,
const int64_t kernel_w,
const int64_t pad_h,
const int64_t pad_w,
const int64_t stride_h,
const int64_t stride_w,
const int64_t dilation_h,
const int64_t dilation_w,
T* data_col) {
const int64_t height_col = output_height;
const int64_t width_col = output_width;
const int64_t channels_col = channels * kernel_h * kernel_w;
for (const auto c_col : c10::irange(channels_col)) {
int64_t w_offset = c_col % kernel_w;
int64_t h_offset = (c_col / kernel_w) % kernel_h;
int64_t c_im = c_col / kernel_h / kernel_w;
for (const auto h_col : c10::irange(height_col)) {
int64_t h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
for (const auto w_col : c10::irange(width_col)) {
int64_t w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
data_col[(c_col * height_col + h_col) * width_col + w_col] =
(h_im >= 0 && w_im >= 0 && h_im < height && w_im < width)
? data_im[(c_im * height + h_im) * width + w_im]
: static_cast<T>(0);
}
}
}
}
template <typename T>
static void col2im(
const T* data_col,
const int64_t channels,
const int64_t height,
const int64_t width,
const int64_t output_height,
const int64_t output_width,
const int64_t kernel_h,
const int64_t kernel_w,
const int64_t pad_h,
const int64_t pad_w,
const int64_t stride_h,
const int64_t stride_w,
const int64_t dilation_h,
const int64_t dilation_w,
T* data_im) {
std::fill_n(data_im, height * width * channels, T(0));
const int64_t height_col = output_height;
const int64_t width_col = output_width;
const int64_t channels_col = channels * kernel_h * kernel_w;
for (const auto c_col : c10::irange(channels_col)) {
int64_t w_offset = c_col % kernel_w;
int64_t h_offset = (c_col / kernel_w) % kernel_h;
int64_t c_im = c_col / kernel_h / kernel_w;
for (const auto h_col : c10::irange(height_col)) {
int64_t h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
for (const auto w_col : c10::irange(width_col)) {
int64_t w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width)
data_im[(c_im * height + h_im) * width + w_im] +=
data_col[(c_col * height_col + h_col) * width_col + w_col];
}
}
}
}
} // native
} // at