Merge pull request #1888 from antinucleon/cudnn_conv

AddTo support in CuDNN Conv & BatchNorm
starimpact · Apr 18, 2016 · d1c21b2 · d1c21b2
2 parents 0e7d516 + 6a78b03
commit d1c21b2
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 9 deletions.
diff --git a/src/operator/cudnn_batch_norm.cu b/src/operator/cudnn_batch_norm.cu
@@ -154,15 +154,17 @@ class CuDNNBatchNormOp : public Operator {
       out_data[cudnnbatchnorm::kMean].get_with_shape<gpu, 1, real_t>(Shape1(shape_[1]), s);
     Tensor<gpu, 1> save_inv_var =
       out_data[cudnnbatchnorm::kInvVar].get_with_shape<gpu, 1, real_t>(Shape1(shape_[1]), s);
-    float a = 1.0f, b = 0.0f;
+    float a = 1.0f;
+    float b = 0.0f;
+    float b_add = 1.0f;
     CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream<gpu>::OwnHandle);
 #if CUDNN_VERSION >= 4007
     CHECK_EQ(cudnnBatchNormalizationBackward(s->dnn_handle_,
                                              CUDNN_BATCHNORM_SPATIAL,
                                              &a,
                                              &b,
                                              &a,
-                                             &b,
+                                             req[cudnnbatchnorm::kGamma] == kWriteTo ? &b: &b_add,
                                              io_desc_,
                                              x.dptr_,
                                              io_desc_,

diff --git a/src/operator/cudnn_convolution-inl.h b/src/operator/cudnn_convolution-inl.h
@@ -110,8 +110,6 @@ class CuDNNConvolutionOp : public Operator {
     size_t expected = param_.no_bias == 0 ? 3 : 2;
     CHECK_EQ(out_grad.size(), 1);
     CHECK(in_data.size() == expected && in_grad.size() == expected);
-    // TODO(bing): think about how to support add to
-    CHECK_EQ(req[conv::kWeight], kWriteTo);
     Stream<gpu> *s = ctx.get_stream<gpu>();
     Tensor<gpu, 4, DType> grad = out_grad[conv::kOut].get<gpu, 4, DType>(s);
     Tensor<gpu, 4, DType> wmat = in_data[conv::kWeight].get<gpu, 4, DType>(s);
@@ -124,13 +122,14 @@ class CuDNNConvolutionOp : public Operator {
     for (uint32_t g = 0; g < param_.num_group; ++g) {
       typename DataType<DType>::ScaleType alpha = 1.0f;
       typename DataType<DType>::ScaleType beta = 0.0f;
+      typename DataType<DType>::ScaleType beta_add = 1.0f;
       if (!param_.no_bias) {
         Tensor<gpu, 1, DType> gbias = in_grad[conv::kBias].get<gpu, 1, DType>(s);
         CHECK_EQ(cudnnConvolutionBackwardBias(s->dnn_handle_,
                                               &alpha,
                                               out_desc_,
                                               grad.dptr_ + out_offset_ * g,
-                                              &beta,
+                                              req[conv::kBias] == kWriteTo ? &beta : &beta_add,
                                               bias_desc_,
                                               gbias.dptr_ + bias_offset_ * g),
                  CUDNN_STATUS_SUCCESS);
@@ -146,7 +145,7 @@ class CuDNNConvolutionOp : public Operator {
                back_algo_w_,
                workspace.dptr_,
                backward_workspace_byte_,
-               &beta,
+               req[conv::kWeight] == kWriteTo? &beta : &beta_add,
                filter_desc_,
                gwmat.dptr_ + weight_offset_ * g), CUDNN_STATUS_SUCCESS);
       #elif CUDNN_MAJOR == 5
@@ -160,7 +159,7 @@ class CuDNNConvolutionOp : public Operator {
                back_algo_w_,
                workspace.dptr_,
                backward_workspace_byte_,
-               &beta,
+               req[conv::kWeight] == kWriteTo? &beta : &beta_add,
                filter_desc_,
                gwmat.dptr_ + weight_offset_ * g), CUDNN_STATUS_SUCCESS);
       #endif

diff --git a/src/symbol/static_graph.cc b/src/symbol/static_graph.cc
@@ -380,8 +380,6 @@ StaticGraph::Node StaticGraph::CreateGradSumNode(
   if (grad_source.size() < inplace_sum_cap) {
     gsource = grad_source;
   } else {
-    LOG(INFO) << "Memory efficient gradient aggregation on..."
-              << " to disable, set MXNET_EXEC_INPLACE_GRAD_SUM_CAP to big number";
     for (size_t i = 1; i < grad_source.size(); ++i) {
       nodes[grad_source[i].source_id]
           .addto_index.push_back(grad_source[i].index);