Skip to content

Commit

Permalink
Merge pull request #1888 from antinucleon/cudnn_conv
Browse files Browse the repository at this point in the history
AddTo support in CuDNN Conv & BatchNorm
  • Loading branch information
antinucleon committed Apr 18, 2016
2 parents 0e7d516 + 6a78b03 commit d1c21b2
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 9 deletions.
6 changes: 4 additions & 2 deletions src/operator/cudnn_batch_norm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,17 @@ class CuDNNBatchNormOp : public Operator {
out_data[cudnnbatchnorm::kMean].get_with_shape<gpu, 1, real_t>(Shape1(shape_[1]), s);
Tensor<gpu, 1> save_inv_var =
out_data[cudnnbatchnorm::kInvVar].get_with_shape<gpu, 1, real_t>(Shape1(shape_[1]), s);
float a = 1.0f, b = 0.0f;
float a = 1.0f;
float b = 0.0f;
float b_add = 1.0f;
CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream<gpu>::OwnHandle);
#if CUDNN_VERSION >= 4007
CHECK_EQ(cudnnBatchNormalizationBackward(s->dnn_handle_,
CUDNN_BATCHNORM_SPATIAL,
&a,
&b,
&a,
&b,
req[cudnnbatchnorm::kGamma] == kWriteTo ? &b: &b_add,
io_desc_,
x.dptr_,
io_desc_,
Expand Down
9 changes: 4 additions & 5 deletions src/operator/cudnn_convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,6 @@ class CuDNNConvolutionOp : public Operator {
size_t expected = param_.no_bias == 0 ? 3 : 2;
CHECK_EQ(out_grad.size(), 1);
CHECK(in_data.size() == expected && in_grad.size() == expected);
// TODO(bing): think about how to support add to
CHECK_EQ(req[conv::kWeight], kWriteTo);
Stream<gpu> *s = ctx.get_stream<gpu>();
Tensor<gpu, 4, DType> grad = out_grad[conv::kOut].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> wmat = in_data[conv::kWeight].get<gpu, 4, DType>(s);
Expand All @@ -124,13 +122,14 @@ class CuDNNConvolutionOp : public Operator {
for (uint32_t g = 0; g < param_.num_group; ++g) {
typename DataType<DType>::ScaleType alpha = 1.0f;
typename DataType<DType>::ScaleType beta = 0.0f;
typename DataType<DType>::ScaleType beta_add = 1.0f;
if (!param_.no_bias) {
Tensor<gpu, 1, DType> gbias = in_grad[conv::kBias].get<gpu, 1, DType>(s);
CHECK_EQ(cudnnConvolutionBackwardBias(s->dnn_handle_,
&alpha,
out_desc_,
grad.dptr_ + out_offset_ * g,
&beta,
req[conv::kBias] == kWriteTo ? &beta : &beta_add,
bias_desc_,
gbias.dptr_ + bias_offset_ * g),
CUDNN_STATUS_SUCCESS);
Expand All @@ -146,7 +145,7 @@ class CuDNNConvolutionOp : public Operator {
back_algo_w_,
workspace.dptr_,
backward_workspace_byte_,
&beta,
req[conv::kWeight] == kWriteTo? &beta : &beta_add,
filter_desc_,
gwmat.dptr_ + weight_offset_ * g), CUDNN_STATUS_SUCCESS);
#elif CUDNN_MAJOR == 5
Expand All @@ -160,7 +159,7 @@ class CuDNNConvolutionOp : public Operator {
back_algo_w_,
workspace.dptr_,
backward_workspace_byte_,
&beta,
req[conv::kWeight] == kWriteTo? &beta : &beta_add,
filter_desc_,
gwmat.dptr_ + weight_offset_ * g), CUDNN_STATUS_SUCCESS);
#endif
Expand Down
2 changes: 0 additions & 2 deletions src/symbol/static_graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,6 @@ StaticGraph::Node StaticGraph::CreateGradSumNode(
if (grad_source.size() < inplace_sum_cap) {
gsource = grad_source;
} else {
LOG(INFO) << "Memory efficient gradient aggregation on..."
<< " to disable, set MXNET_EXEC_INPLACE_GRAD_SUM_CAP to big number";
for (size_t i = 1; i < grad_source.size(); ++i) {
nodes[grad_source[i].source_id]
.addto_index.push_back(grad_source[i].index);
Expand Down

0 comments on commit d1c21b2

Please sign in to comment.