Skip to content

MKLDNN pool2d OP kernel added #8879

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
289 changes: 126 additions & 163 deletions paddle/fluid/operators/conv_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,58 +12,21 @@
See the License for the specific language governing permissions and
limitations under the License. */

#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"

namespace paddle {
namespace operators {

using paddle::framework::Tensor;
using paddle::platform::MKLDNNDeviceContext;
using paddle::platform::MKLDNNMemDesc;

using mkldnn::memory; // Note: paddle has also "memory" namespace
using mkldnn::primitive;
using mkldnn::convolution_forward;
using mkldnn::convolution_backward_weights;
using mkldnn::convolution_backward_data;
using mkldnn::convolution_direct;
using mkldnn::prop_kind;
using mkldnn::padding_kind;
using mkldnn::stream;

namespace {
std::unique_ptr<mkldnn::convolution_forward::primitive_desc>
ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights,
const memory::desc& dst, const std::vector<int>& strides,
const std::vector<int>& paddings,
const mkldnn::engine& engine);

convolution_backward_weights::primitive_desc ConvBwdWeightsPrimitiveDesc(
const memory::desc& src, const memory::desc& diff_weights,
const memory::desc& diff_dst, const std::vector<int>& strides,
const std::vector<int>& paddings,
const convolution_forward::primitive_desc& conv_pd,
const mkldnn::engine& engine);

convolution_backward_data::primitive_desc ConvBwdDataPrimitiveDesc(
const memory::desc& diff_src, const memory::desc& weights,
const memory::desc& diff_dst, const std::vector<int>& strides,
const std::vector<int>& paddings,
const convolution_forward::primitive_desc& conv_pd,
const mkldnn::engine& engine);
} // anonymous namespace

template <typename T>
class ConvOpMkldnnKernel : public paddle::framework::OpKernel<T> {
class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");

auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
auto& dev_ctx =
ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();

auto* input = ctx.Input<Tensor>("Input");
Expand All @@ -88,7 +51,6 @@ class ConvOpMkldnnKernel : public paddle::framework::OpKernel<T> {

const T* input_data = input->data<T>();
const T* filter_data = filter->data<T>();
// allocate memory for output
T* output_data = output->mutable_data<T>(ctx.GetPlace());

PADDLE_ENFORCE(input->dims().size() == 4,
Expand All @@ -102,48 +64,69 @@ class ConvOpMkldnnKernel : public paddle::framework::OpKernel<T> {
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());

// TODO(pzelazko-intel): support more formats
// memory descriptors for convolution src/weight/dst
auto conv_src_md =
MKLDNNMemDesc(src_tz, memory::data_type::f32, memory::format::nchw);
auto conv_weights_md =
MKLDNNMemDesc(weights_tz, memory::data_type::f32, memory::format::oihw);
auto conv_dst_md =
MKLDNNMemDesc(dst_tz, memory::data_type::f32, memory::format::nchw);

// create memory primitives
auto conv_src_memory =
memory({conv_src_md, mkldnn_engine}, (void*)input_data);
auto conv_weights_memory =
memory({conv_weights_md, mkldnn_engine}, (void*)filter_data);
auto conv_dst_memory = memory({conv_dst_md, mkldnn_engine}, output_data);

std::unique_ptr<convolution_forward::primitive_desc> conv_pd =
ConvFwdPrimitiveDesc(conv_src_md, conv_weights_md, conv_dst_md, strides,
paddings, mkldnn_engine);

// save p_conv_pd into dev_ctx to be referred in backward path
auto p_conv_pd = conv_pd.get();
std::shared_ptr<void> conv_pd_value = std::move(conv_pd);
dev_ctx.SetBlob(key_conv_pd, conv_pd_value);
auto src_md = platform::MKLDNNMemDesc(
src_tz, mkldnn::memory::data_type::f32, mkldnn::memory::format::nchw);
auto weights_md =
platform::MKLDNNMemDesc(weights_tz, mkldnn::memory::data_type::f32,
mkldnn::memory::format::oihw);
auto dst_md = platform::MKLDNNMemDesc(
dst_tz, mkldnn::memory::data_type::f32, mkldnn::memory::format::nchw);

auto src_memory =
mkldnn::memory({src_md, mkldnn_engine}, (void*)input_data);
auto weights_memory =
mkldnn::memory({weights_md, mkldnn_engine}, (void*)filter_data);
auto dst_memory = mkldnn::memory({dst_md, mkldnn_engine}, output_data);

std::shared_ptr<mkldnn::convolution_forward::primitive_desc> conv_pd =
ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, paddings,
mkldnn_engine);

// save conv_pd into global device context to be referred in backward path
dev_ctx.SetBlob(key_conv_pd, conv_pd);

// create convolution op primitive
auto conv_prim = convolution_forward(*p_conv_pd, conv_src_memory,
conv_weights_memory, conv_dst_memory);
auto conv_prim = mkldnn::convolution_forward(*conv_pd, src_memory,
weights_memory, dst_memory);

// push primitive to stream and wait until it's executed
std::vector<mkldnn::primitive> pipeline{conv_prim};
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
}

// push op to stream and wait MKLDNN until it's executed
std::vector<primitive> pipeline{conv_prim};
stream(stream::kind::eager).submit(pipeline).wait();
private:
std::unique_ptr<mkldnn::convolution_forward::primitive_desc>
ConvFwdPrimitiveDesc(const mkldnn::memory::desc& src,
const mkldnn::memory::desc& weights,
const mkldnn::memory::desc& dst,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const mkldnn::engine& engine) const {
mkldnn::memory::dims stride_dims = {strides[0], strides[1]};
mkldnn::memory::dims padding_dims = {paddings[0], paddings[1]};

auto conv_desc = mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward, mkldnn::convolution_direct, src, weights,
dst, stride_dims, padding_dims, padding_dims,
mkldnn::padding_kind::zero);

auto p_conv_pd =
new mkldnn::convolution_forward::primitive_desc(conv_desc, engine);

return std::unique_ptr<mkldnn::convolution_forward::primitive_desc>(
p_conv_pd);
}
};

template <typename T>
class ConvGradOpMkldnnKernel : public paddle::framework::OpKernel<T> {
class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");

auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();

const Tensor* input = ctx.Input<Tensor>("Input");
Expand All @@ -170,7 +153,6 @@ class ConvGradOpMkldnnKernel : public paddle::framework::OpKernel<T> {
T* input_grad_data = nullptr;
T* filter_grad_data = nullptr;

// allocate memory for gradient of input/filter
if (input_grad) {
input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
}
Expand All @@ -184,130 +166,111 @@ class ConvGradOpMkldnnKernel : public paddle::framework::OpKernel<T> {
std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());

// TODO(pzelazko-intel): support more formats
auto conv_src_md =
MKLDNNMemDesc(src_tz, memory::data_type::f32, memory::format::nchw);
auto conv_diff_src_md =
MKLDNNMemDesc(src_tz, memory::data_type::f32, memory::format::nchw);
auto conv_weights_md =
MKLDNNMemDesc(weights_tz, memory::data_type::f32, memory::format::oihw);
auto conv_diff_weights_md =
MKLDNNMemDesc(weights_tz, memory::data_type::f32, memory::format::oihw);
auto conv_diff_dst_md =
MKLDNNMemDesc(dst_tz, memory::data_type::f32, memory::format::nchw);
auto src_md = platform::MKLDNNMemDesc(
src_tz, mkldnn::memory::data_type::f32, mkldnn::memory::format::nchw);
auto diff_src_md = platform::MKLDNNMemDesc(
src_tz, mkldnn::memory::data_type::f32, mkldnn::memory::format::nchw);
auto weights_md =
platform::MKLDNNMemDesc(weights_tz, mkldnn::memory::data_type::f32,
mkldnn::memory::format::oihw);
auto diff_weights_md =
platform::MKLDNNMemDesc(weights_tz, mkldnn::memory::data_type::f32,
mkldnn::memory::format::oihw);
auto diff_dst_md = platform::MKLDNNMemDesc(
dst_tz, mkldnn::memory::data_type::f32, mkldnn::memory::format::nchw);

// create memory
auto conv_diff_dst_memory =
memory({conv_diff_weights_md, mkldnn_engine}, (void*)output_grad_data);
auto diff_dst_memory = mkldnn::memory({diff_weights_md, mkldnn_engine},
(void*)output_grad_data);
// Retrieve conv_pd from device context
std::shared_ptr<void> conv_pd;
convolution_forward::primitive_desc* p_conv_pd;

conv_pd = dev_ctx.GetBlob(key_conv_pd);
auto conv_pd =
std::static_pointer_cast<mkldnn::convolution_forward::primitive_desc>(
dev_ctx.GetBlob(key_conv_pd));
PADDLE_ENFORCE(conv_pd != nullptr,
"Fail to find conv_pd in device context");
p_conv_pd =
static_cast<convolution_forward::primitive_desc*>(conv_pd.get());

// create backward conv primitive for weights
if (filter_grad) {
// create primitive descriptor
convolution_backward_weights::primitive_desc conv_bwd_weights_pd =
ConvBwdWeightsPrimitiveDesc(conv_src_md, conv_diff_weights_md,
conv_diff_dst_md, strides, paddings,
*p_conv_pd, mkldnn_engine);
mkldnn::convolution_backward_weights::primitive_desc conv_bwd_weights_pd =
ConvBwdWeightsPrimitiveDesc(src_md, diff_weights_md, diff_dst_md,
strides, paddings, *conv_pd,
mkldnn_engine);

// create memory
auto conv_diff_weights_memory = memory(
{conv_diff_weights_md, mkldnn_engine}, (void*)filter_grad_data);
auto conv_src_memory =
memory({conv_src_md, mkldnn_engine}, (void*)input_data);
auto diff_weights_memory = mkldnn::memory(
{diff_weights_md, mkldnn_engine}, (void*)filter_grad_data);
auto src_memory =
mkldnn::memory({src_md, mkldnn_engine}, (void*)input_data);

// create backward conv primitive for weights
auto conv_bwd_weights_prim = convolution_backward_weights(
conv_bwd_weights_pd, conv_src_memory, conv_diff_dst_memory,
conv_diff_weights_memory);
auto conv_bwd_weights_prim = mkldnn::convolution_backward_weights(
conv_bwd_weights_pd, src_memory, diff_dst_memory,
diff_weights_memory);

// push primitive and execute it
std::vector<primitive> pipeline{conv_bwd_weights_prim};
stream(stream::kind::eager).submit(pipeline).wait();
std::vector<mkldnn::primitive> pipeline{conv_bwd_weights_prim};
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
}

if (input_grad) {
// create primitive descriptor
convolution_backward_data::primitive_desc conv_bwd_data_pd =
ConvBwdDataPrimitiveDesc(conv_diff_src_md, conv_weights_md,
conv_diff_dst_md, strides, paddings,
*p_conv_pd, mkldnn_engine);
mkldnn::convolution_backward_data::primitive_desc conv_bwd_data_pd =
ConvBwdDataPrimitiveDesc(diff_src_md, weights_md, diff_dst_md,
strides, paddings, *conv_pd, mkldnn_engine);

// create memory
auto conv_diff_src_memory =
memory({conv_diff_src_md, mkldnn_engine}, (void*)input_grad_data);
auto conv_weights_memory =
memory({conv_weights_md, mkldnn_engine}, (void*)filter_data);
auto diff_src_memory =
mkldnn::memory({diff_src_md, mkldnn_engine}, (void*)input_grad_data);
auto weights_memory =
mkldnn::memory({weights_md, mkldnn_engine}, (void*)filter_data);

// create backward conv primitive for data
auto conv_bwd_data_prim =
convolution_backward_data(conv_bwd_data_pd, conv_diff_dst_memory,
conv_weights_memory, conv_diff_src_memory);
auto conv_bwd_data_prim = mkldnn::convolution_backward_data(
conv_bwd_data_pd, diff_dst_memory, weights_memory, diff_src_memory);

// push primitive and execute it
std::vector<primitive> pipeline{conv_bwd_data_prim};
stream(stream::kind::eager).submit(pipeline).wait();
// push primitive to stream and wait until it's executed
std::vector<mkldnn::primitive> pipeline{conv_bwd_data_prim};
mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait();
}
} // Compute()

private:
mkldnn::convolution_backward_weights::primitive_desc
ConvBwdWeightsPrimitiveDesc(
const mkldnn::memory::desc& src, const mkldnn::memory::desc& diff_weights,
const mkldnn::memory::desc& diff_dst, const std::vector<int>& strides,
const std::vector<int>& paddings,
const mkldnn::convolution_forward::primitive_desc& conv_pd,
const mkldnn::engine& engine) const {
auto conv_bwd_weights_desc = mkldnn::convolution_backward_weights::desc(
mkldnn::convolution_direct, src, diff_weights, diff_dst, strides,
paddings, paddings, mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_weights::primitive_desc(
conv_bwd_weights_desc, engine, conv_pd);
}

mkldnn::convolution_backward_data::primitive_desc ConvBwdDataPrimitiveDesc(
const mkldnn::memory::desc& diff_src, const mkldnn::memory::desc& weights,
const mkldnn::memory::desc& diff_dst, const std::vector<int>& strides,
const std::vector<int>& paddings,
const mkldnn::convolution_forward::primitive_desc& conv_pd,
const mkldnn::engine& engine) const {
auto conv_bwd_data_desc = mkldnn::convolution_backward_data::desc(
mkldnn::convolution_direct, diff_src, weights, diff_dst, strides,
paddings, paddings, mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_data::primitive_desc(conv_bwd_data_desc,
engine, conv_pd);
}
};

namespace {
std::unique_ptr<convolution_forward::primitive_desc> ConvFwdPrimitiveDesc(
const memory::desc& src, const memory::desc& weights,
const memory::desc& dst, const std::vector<int>& strides,
const std::vector<int>& paddings, const mkldnn::engine& engine) {
mkldnn::memory::dims stride_dims = {strides[0], strides[1]};
mkldnn::memory::dims padding_dims = {paddings[0], paddings[1]};

auto conv_desc = mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward, mkldnn::convolution_direct, src, weights, dst,
stride_dims, padding_dims, padding_dims, mkldnn::padding_kind::zero);

auto p_conv_pd = new convolution_forward::primitive_desc(conv_desc, engine);

return std::unique_ptr<mkldnn::convolution_forward::primitive_desc>(
p_conv_pd);
}

convolution_backward_weights::primitive_desc ConvBwdWeightsPrimitiveDesc(
const memory::desc& src, const memory::desc& diff_weights,
const memory::desc& diff_dst, const std::vector<int>& strides,
const std::vector<int>& paddings,
const convolution_forward::primitive_desc& conv_pd,
const mkldnn::engine& engine) {
auto conv_bwd_weights_desc = convolution_backward_weights::desc(
convolution_direct, src, diff_weights, diff_dst, strides, paddings,
paddings, padding_kind::zero);
return convolution_backward_weights::primitive_desc(conv_bwd_weights_desc,
engine, conv_pd);
}

convolution_backward_data::primitive_desc ConvBwdDataPrimitiveDesc(
const memory::desc& diff_src, const memory::desc& weights,
const memory::desc& diff_dst, const std::vector<int>& strides,
const std::vector<int>& paddings,
const convolution_forward::primitive_desc& conv_pd,
const mkldnn::engine& engine) {
auto conv_bwd_data_desc = convolution_backward_data::desc(
convolution_direct, diff_src, weights, diff_dst, strides, paddings,
paddings, padding_kind::zero);
return convolution_backward_data::primitive_desc(conv_bwd_data_desc, engine,
conv_pd);
}
} // anonymous namespace
} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;

REGISTER_OP_KERNEL(conv2d, MKLDNN, ::paddle::platform::CPUPlace,
ops::ConvOpMkldnnKernel<float>);
ops::ConvMKLDNNOpKernel<float>);

REGISTER_OP_KERNEL(conv2d_grad, MKLDNN, ::paddle::platform::CPUPlace,
ops::ConvGradOpMkldnnKernel<float>);
ops::ConvMKLDNNGradOpKernel<float>);
Loading