Skip to content

Commit

Permalink
[MXNET-491] Use depthwise convolution by cuDNNv7 if available, update…
Browse files Browse the repository at this point in the history
…d version (apache#11076)

* Use group convolution by cuDNNv7 if available

* Fix coding style

* ident-- for #if statements

* more ident--

* more ident--

* prefer cudnnv7 depthwise convolution
  • Loading branch information
nihui authored and piiswrong committed May 29, 2018
1 parent 4ac76c8 commit 805a71a
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 2 deletions.
10 changes: 8 additions & 2 deletions src/operator/nn/convolution.cu
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
op.Forward(ctx, inputs, req, outputs);
})
return;
} else if (param.num_filter == param.num_group &&
}
#if MXNET_USE_CUDNN == 0 || CUDNN_MAJOR < 7
if (param.num_filter == param.num_group &&
param.layout.value() == mshadow::kNCHW &&
param.num_filter == inputs[conv::kData].shape_[1] &&
param.kernel.ndim() == 2 &&
Expand All @@ -112,6 +114,7 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
op.Forward(ctx, inputs, req, outputs);
return;
}
#endif

#if MXNET_USE_CUDNN == 1
// On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16).
Expand Down Expand Up @@ -167,7 +170,9 @@ void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
})
return;
} else if (param.num_filter == param.num_group &&
}
#if MXNET_USE_CUDNN == 0 || CUDNN_MAJOR < 7
if (param.num_filter == param.num_group &&
param.layout.value() == mshadow::kNCHW &&
param.num_filter == in_data[conv::kData].shape_[1] &&
param.kernel.ndim() == 2 &&
Expand All @@ -183,6 +188,7 @@ void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
return;
}
#endif

#if MXNET_USE_CUDNN == 1
// On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16).
Expand Down
92 changes: 92 additions & 0 deletions src/operator/nn/cudnn/cudnn_convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,35 @@ class CuDNNConvolutionOp {
DType *wmat_ptr = GetNdPtr(in_data[conv::kWeight], param_.kernel.ndim() + 2, s);
DType *out_ptr = GetNdPtr(out_data[conv::kOut], param_.kernel.ndim() + 2, s);

#if CUDNN_MAJOR >= 7
typename DataType<DType>::ScaleType alpha = 1.0f;
typename DataType<DType>::ScaleType beta = 0.0f;
typename DataType<DType>::ScaleType beta_add = 1.0f;
CUDNN_CALL(cudnnConvolutionForward(s->dnn_handle_,
&alpha,
in_desc_,
data_ptr,
filter_desc_,
wmat_ptr,
forward_conv_desc_,
forward_algo_.AlgoNumber(),
workspace.dptr_,
workspace_size,
req[conv::kOut] == kAddTo? &beta_add : &beta,
out_desc_,
out_ptr));

if (!param_.no_bias) {
Tensor<gpu, 1, DType> bias = in_data[conv::kBias].get<gpu, 1, DType>(s);
CUDNN_CALL(cudnnAddTensor(s->dnn_handle_,
&alpha,
bias_desc_,
bias.dptr_,
&beta_add,
out_desc_,
out_ptr));
}
#else
for (uint32_t g = 0; g < param_.num_group; ++g) {
typename DataType<DType>::ScaleType alpha = 1.0f;
typename DataType<DType>::ScaleType beta = 0.0f;
Expand Down Expand Up @@ -177,6 +206,7 @@ class CuDNNConvolutionOp {
#endif
}
}
#endif // CUDNN_MAJOR >= 7
}

void Backward(const OpContext &ctx,
Expand All @@ -202,6 +232,51 @@ class CuDNNConvolutionOp {
GetTempSize(ctx);
Tensor<gpu, 1, DType> workspace = AllocateTempWorkspace(ctx, backward_workspace_byte_);
size_t workspace_size = TensorSizeBytes(workspace);
#if CUDNN_MAJOR >= 7
typename DataType<DType>::ScaleType alpha = 1.0f;
typename DataType<DType>::ScaleType beta = 0.0f;
typename DataType<DType>::ScaleType beta_add = 1.0f;
if (!param_.no_bias && (req[conv::kBias] != kNullOp)) {
Tensor<gpu, 1, DType> gbias = in_grad[conv::kBias].get<gpu, 1, DType>(s);
CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_,
&alpha,
out_desc_,
grad_ptr,
req[conv::kBias] == kAddTo ? &beta_add : &beta,
bias_desc_,
gbias.dptr_));
}
if (req[conv::kWeight] != kNullOp) {
CUDNN_CALL(cudnnConvolutionBackwardFilter(s->dnn_handle_,
&alpha,
in_desc_,
data_ptr,
out_desc_,
grad_ptr,
back_conv_desc_w_,
back_algo_w_.AlgoNumber(),
workspace.dptr_,
workspace_size,
req[conv::kWeight] == kAddTo? &beta_add : &beta,
filter_desc_,
gwmat_ptr));
}
if (req[conv::kData] != kNullOp) {
CUDNN_CALL(cudnnConvolutionBackwardData(s->dnn_handle_,
&alpha,
filter_desc_,
wmat_ptr,
out_desc_,
grad_ptr,
back_conv_desc_,
back_algo_.AlgoNumber(),
workspace.dptr_,
workspace_size,
req[conv::kData] == kAddTo? &beta_add : &beta,
in_desc_,
gdata_ptr));
}
#else
for (uint32_t g = 0; g < param_.num_group; ++g) {
typename DataType<DType>::ScaleType alpha = 1.0f;
typename DataType<DType>::ScaleType beta = 0.0f;
Expand Down Expand Up @@ -279,6 +354,7 @@ class CuDNNConvolutionOp {
#endif
}
}
#endif // CUDNN_MAJOR >= 7
}

/*!
Expand Down Expand Up @@ -342,7 +418,10 @@ class CuDNNConvolutionOp {
TShape wshape = in_shape[conv::kWeight];
TShape oshape = out_shape[conv::kOut];
TShape dstride, ostride;
#if CUDNN_MAJOR <= 6
wshape[0] /= param_.num_group;
#endif

#if CUDNN_MAJOR <= 5
// As of cuDNN_v6, the unsuffixed version of cudnnSetConvolution2dDescriptor()
// takes an additional 'computeType' parameter to set the precision of the
Expand Down Expand Up @@ -464,9 +543,15 @@ class CuDNNConvolutionOp {
CUDNN_CALL(cudnnSetConvolutionMathType(forward_conv_desc_, math_type));
CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_, math_type));
CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_w_, math_type));
CUDNN_CALL(cudnnSetConvolutionGroupCount(forward_conv_desc_, param_.num_group));
CUDNN_CALL(cudnnSetConvolutionGroupCount(back_conv_desc_, param_.num_group));
CUDNN_CALL(cudnnSetConvolutionGroupCount(back_conv_desc_w_, param_.num_group));
#endif

#if CUDNN_MAJOR <= 6
dshape[1] /= param_.num_group;
oshape[1] /= param_.num_group;
#endif
weight_offset_ = wshape.Size();
data_offset_ = dstride[1] * dshape[1];
out_offset_ = ostride[1] * oshape[1];
Expand Down Expand Up @@ -494,10 +579,17 @@ class CuDNNConvolutionOp {

if (!param_.no_bias) {
TShape bias = in_shape[conv::kBias];
#if CUDNN_MAJOR >= 7
bias_offset_ = bias[0];
std::vector<int> bias_shape = {1,
static_cast<int>(bias[0]),
1, 1};
#else
bias_offset_ = bias[0] / param_.num_group;
std::vector<int> bias_shape = {1,
static_cast<int>(bias[0] / param_.num_group),
1, 1};
#endif
std::vector<int> bias_stride = {static_cast<int>(bias_offset_), 1, 1, 1};
if (param_.kernel.ndim() == 3) {
bias_shape.push_back(1);
Expand Down

0 comments on commit 805a71a

Please sign in to comment.