-
Couldn't load subscription status.
- Fork 5.9k
Add im2col functor #3753
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add im2col functor #3753
Changes from all commits
6efbe2f
f7be9cb
e967645
2d707e3
abfac74
45c8f9b
f807807
3f55500
1a615b4
32d7e61
ebe8966
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,10 @@ | ||
|
|
||
| if(WITH_GPU) | ||
| nv_library(math_function SRCS math_function.cc math_function.cu DEPS cblas device_context) | ||
| nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc | ||
| im2col.cu DEPS cblas device_context) | ||
| else() | ||
| cc_library(math_function SRCS math_function.cc DEPS cblas device_context) | ||
| cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context) | ||
| endif() | ||
|
|
||
| nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) | ||
| cc_test(im2col_test SRCS im2col_test.cc DEPS math_function tensor) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,260 @@ | ||
| /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. */ | ||
|
|
||
| #include "paddle/operators/math/im2col.h" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe it's better to rename math file to functor file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean rename the |
||
|
|
||
| namespace paddle { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use namespace as follows? Whether the The namespace in math is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use the second way(paddle->operators->math). |
||
| namespace operators { | ||
| namespace math { | ||
|
|
||
| /* | ||
| * im = [input_channels, input_height, input_width] | ||
| * col = | ||
| * [input_channels, filter_height, filter_width, output_height, output_width] | ||
| */ | ||
| template <class T> | ||
| class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, | ||
| platform::CPUPlace, T> { | ||
| public: | ||
| void operator()(const framework::Tensor& im, framework::Tensor& col, | ||
| int stride_height, int stride_width, int padding_height, | ||
| int padding_width, platform::DeviceContext* context) { | ||
| PADDLE_ENFORCE(im.dims().size() == 3); | ||
| PADDLE_ENFORCE(col.dims().size() == 5); | ||
|
|
||
| int input_channels = im.dims()[0]; | ||
| int input_height = im.dims()[1]; | ||
| int input_width = im.dims()[2]; | ||
| int filter_height = col.dims()[1]; | ||
| int filter_width = col.dims()[2]; | ||
| int output_height = col.dims()[3]; | ||
| int output_width = col.dims()[4]; | ||
| int channels_col = input_channels * filter_height * filter_width; | ||
|
|
||
| const T* im_data = im.data<T>(); | ||
| T* col_data = col.data<T>(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does the functor interface require lazy allocation? Such as gemm will be wrapped into a functor, does the output of this functor need to lazy allocation? |
||
|
|
||
| for (int c = 0; c < channels_col; ++c) { | ||
| int w_offset = c % filter_width; | ||
| int h_offset = (c / filter_width) % filter_height; | ||
| int c_im = c / filter_width / filter_height; | ||
| for (int h = 0; h < output_height; ++h) { | ||
| for (int w = 0; w < output_width; ++w) { | ||
| int im_row_idx = h * stride_height + h_offset; | ||
| int im_col_idx = w * stride_width + w_offset; | ||
| if ((im_row_idx - padding_height) < 0 || | ||
| (im_row_idx - padding_height) >= input_height || | ||
| (im_col_idx - padding_width) < 0 || | ||
| (im_col_idx - padding_width) >= input_width) { | ||
| col_data[(c * output_height + h) * output_width + w] = T(0); | ||
| } else { | ||
| im_row_idx += c_im * input_height - padding_height; | ||
| im_col_idx -= padding_width; | ||
| col_data[(c * output_height + h) * output_width + w] = | ||
| im_data[im_row_idx * input_width + im_col_idx]; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| /* | ||
| * im = [input_channels, input_height, input_width] | ||
| * col = | ||
| * [input_channels, filter_height, filter_width, output_height, output_width] | ||
| */ | ||
| template <class T> | ||
| class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO, | ||
| platform::CPUPlace, T> { | ||
| public: | ||
| void operator()(framework::Tensor& im, const framework::Tensor& col, | ||
| int stride_height, int stride_width, int padding_height, | ||
| int padding_width, platform::DeviceContext* context) { | ||
| PADDLE_ENFORCE(im.dims().size() == 3); | ||
| PADDLE_ENFORCE(col.dims().size() == 5); | ||
| int input_channels = im.dims()[0]; | ||
| int input_height = im.dims()[1]; | ||
| int input_width = im.dims()[2]; | ||
| int filter_height = col.dims()[1]; | ||
| int filter_width = col.dims()[2]; | ||
| int output_height = col.dims()[3]; | ||
| int output_width = col.dims()[4]; | ||
| int channels_col = input_channels * filter_height * filter_width; | ||
|
|
||
| T* im_data = im.data<T>(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. due to the lazy allocation, I think it's better to use |
||
| const T* col_data = col.data<T>(); | ||
|
|
||
| for (int c = 0; c < channels_col; ++c) { | ||
| int w_offset = c % filter_width; | ||
| int h_offset = (c / filter_width) % filter_height; | ||
| int c_im = c / filter_width / filter_height; | ||
| for (int h = 0; h < output_height; ++h) { | ||
| for (int w = 0; w < output_width; ++w) { | ||
| int im_row_idx = h * stride_height + h_offset; | ||
| int im_col_idx = w * stride_width + w_offset; | ||
| if ((im_row_idx - padding_height) >= 0 && | ||
| (im_row_idx - padding_height) < input_height && | ||
| (im_col_idx - padding_width) >= 0 && | ||
| (im_col_idx - padding_width) < input_width) { | ||
| im_row_idx += c_im * input_height - padding_height; | ||
| im_col_idx -= padding_width; | ||
| im_data[im_row_idx * input_width + im_col_idx] += | ||
| col_data[(c * output_height + h) * output_width + w]; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| template class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, | ||
| platform::CPUPlace, float>; | ||
| template class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO, | ||
| platform::CPUPlace, double>; | ||
| template class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO, | ||
| platform::CPUPlace, float>; | ||
| template class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO, | ||
| platform::CPUPlace, double>; | ||
|
|
||
| /* | ||
| * im = [input_channels, input_height, input_width] | ||
| * col = | ||
| * [output_height, output_width, input_channels, filter_height, filter_width] | ||
| */ | ||
| template <class T> | ||
| class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF, | ||
| platform::CPUPlace, T> { | ||
| public: | ||
| void operator()(const framework::Tensor& im, framework::Tensor& col, | ||
| int stride_height, int stride_width, int padding_height, | ||
| int padding_width, platform::DeviceContext* context) { | ||
| PADDLE_ENFORCE(im.dims().size() == 3); | ||
| PADDLE_ENFORCE(col.dims().size() == 5); | ||
| int input_channels = im.dims()[0]; | ||
| int input_height = im.dims()[1]; | ||
| int input_width = im.dims()[2]; | ||
| int filter_height = col.dims()[3]; | ||
| int filter_width = col.dims()[4]; | ||
| int output_height = col.dims()[0]; | ||
| int output_width = col.dims()[1]; | ||
|
|
||
| const T* im_data = im.data<T>(); | ||
| T* col_data = col.data<T>(); | ||
|
|
||
| for (int col_row_idx = 0; col_row_idx < output_height; ++col_row_idx) { | ||
| for (int col_col_idx = 0; col_col_idx < output_width; ++col_col_idx) { | ||
| for (int channel = 0; channel < input_channels; ++channel) { | ||
| for (int filter_row_idx = 0; filter_row_idx < filter_height; | ||
| ++filter_row_idx) { | ||
| for (int filter_col_idx = 0; filter_col_idx < filter_width; | ||
| ++filter_col_idx) { | ||
| int im_row_offset = | ||
| col_row_idx * stride_height + filter_row_idx - padding_height; | ||
| int im_col_offset = | ||
| col_col_idx * stride_width + filter_col_idx - padding_width; | ||
| int col_offset = (((col_row_idx * output_width + col_col_idx) * | ||
| input_channels + | ||
| channel) * | ||
| filter_height + | ||
| filter_row_idx) * | ||
| filter_width + | ||
| filter_col_idx; | ||
| if (im_row_offset < 0 || im_row_offset >= input_height || | ||
| im_col_offset < 0 || im_col_offset >= input_width) { | ||
| col_data[col_offset] = T(0); | ||
| } else { | ||
| int im_offset = | ||
| (channel * input_height + im_row_offset) * input_width + | ||
| im_col_offset; | ||
| col_data[col_offset] = im_data[im_offset]; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| /* | ||
| * im = [input_channels, input_height, input_width] | ||
| * col = | ||
| * [output_height, output_width, input_channels, filter_height, filter_width] | ||
| */ | ||
| template <class T> | ||
| class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF, | ||
| platform::CPUPlace, T> { | ||
| public: | ||
| void operator()(framework::Tensor& im, const framework::Tensor& col, | ||
| int stride_height, int stride_width, int padding_height, | ||
| int padding_width, platform::DeviceContext* context) { | ||
| PADDLE_ENFORCE(im.dims().size() == 3); | ||
| PADDLE_ENFORCE(col.dims().size() == 5); | ||
| int input_channels = im.dims()[0]; | ||
| int input_height = im.dims()[1]; | ||
| int input_width = im.dims()[2]; | ||
| int filter_height = col.dims()[3]; | ||
| int filter_width = col.dims()[4]; | ||
| int output_height = col.dims()[0]; | ||
| int output_width = col.dims()[1]; | ||
|
|
||
| T* im_data = im.data<T>(); | ||
| const T* col_data = col.data<T>(); | ||
|
|
||
| for (int col_row_idx = 0; col_row_idx < output_height; ++col_row_idx) { | ||
| for (int col_col_idx = 0; col_col_idx < output_width; ++col_col_idx) { | ||
| for (int channel = 0; channel < input_channels; ++channel) { | ||
| for (int filter_row_idx = 0; filter_row_idx < filter_height; | ||
| ++filter_row_idx) { | ||
| for (int filter_col_idx = 0; filter_col_idx < filter_width; | ||
| ++filter_col_idx) { | ||
| int im_row_offset = | ||
| col_row_idx * stride_height + filter_row_idx - padding_height; | ||
| int im_col_offset = | ||
| col_col_idx * stride_width + filter_col_idx - padding_width; | ||
| int col_offset = (((col_row_idx * output_width + col_col_idx) * | ||
| input_channels + | ||
| channel) * | ||
| filter_height + | ||
| filter_row_idx) * | ||
| filter_width + | ||
| filter_col_idx; | ||
| if (im_row_offset >= 0 && im_row_offset < input_height && | ||
| im_col_offset >= 0 && im_col_offset < input_width) { | ||
| int im_offset = | ||
| (channel * input_height + im_row_offset) * input_width + | ||
| im_col_offset; | ||
| im_data[im_offset] += col_data[col_offset]; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| }; | ||
|
|
||
| template class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF, | ||
| platform::CPUPlace, float>; | ||
| template class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF, | ||
| platform::CPUPlace, double>; | ||
| template class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF, | ||
| platform::CPUPlace, float>; | ||
| template class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF, | ||
| platform::CPUPlace, double>; | ||
|
|
||
| } // namespace math | ||
| } // namespace operators | ||
| } // namespace paddle | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Whether the
nv_libraryandcc_libraryshould be separated for different files?If separated, the operator compiling can only use the specific dependence.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean and a
libmath_function_cu.aonly for CUDA operators?