fix: Fix Conv Layer Backward

hageboeck · Apr 8, 2018 · ffff9e4 · ffff9e4
1 parent a53e50c
commit ffff9e4
Show file tree

Hide file tree

Showing 4 changed files with 81 additions and 102 deletions.
diff --git a/.gitignore b/.gitignore
@@ -507,3 +507,7 @@ tags
 /tutorials/tree/cernstaff.root
 /tutorials/tree/staff.root
 /tutorials/tree/basic.root
+
+# TMVA datasets
+/tmva/tmva/test/DNN/CNN/dataset/SingleElectronPt50_FEVTDEBUG_n250k_IMG_CROPS32.root
+/tmva/tmva/test/DNN/CNN/dataset/SinglePhotonPt50_FEVTDEBUG_n250k_IMG_CROPS32.root
diff --git a/tmva/tmva/inc/TMVA/DNN/DeepNet.h b/tmva/tmva/inc/TMVA/DNN/DeepNet.h
@@ -136,8 +136,8 @@ class TDeepNet {
 
    /*! Function for adding Recurrent Layer in the Deep Neural Network,
     * with given parameters */
-   TBasicRNNLayer<Architecture_t> *AddBasicRNNLayer(size_t stateSize, size_t inputSize,
-                                                    size_t timeSteps, bool rememberState = false);
+   TBasicRNNLayer<Architecture_t> *AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps,
+                                                    bool rememberState = false);
 
    /*! Function for adding Vanilla RNN when the layer is already created
     */
@@ -211,15 +211,15 @@ class TDeepNet {
    void Initialize();
 
    /*! Function that executes the entire forward pass in the network. */
-   void Forward(std::vector<Matrix_t> input, bool applyDropout = false);
+   void Forward(std::vector<Matrix_t> &input, bool applyDropout = false);
 
    /*! Function for parallel forward in the vector of deep nets, where the master
     *  net is the net calling this function. There is one batch for one deep net.*/
    void ParallelForward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
                         std::vector<TTensorBatch<Architecture_t>> &batches, bool applyDropout = false);
 
    /*! Function that executes the entire backward pass in the network. */
-   void Backward(std::vector<Matrix_t> input, const Matrix_t &groundTruth, const Matrix_t &weights);
+   void Backward(std::vector<Matrix_t> &input, const Matrix_t &groundTruth, const Matrix_t &weights);
 
    /* To train the Deep AutoEncoder network with required number of Corruption, Compression and Reconstruction
     * layers. */
@@ -338,8 +338,8 @@ template <typename Architecture_t, typename Layer_t>
 TDeepNet<Architecture_t, Layer_t>::TDeepNet(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
                                             size_t batchDepth, size_t batchHeight, size_t batchWidth, ELossFunction J,
                                             EInitialization I, ERegularization R, Scalar_t weightDecay, bool isTraining)
-   : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fBatchDepth(batchDepth),
-     fBatchHeight(batchHeight), fBatchWidth(batchWidth), fInputWidth(inputWidth), fJ(J), fI(I), fR(R),
+   : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
+     fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fJ(J), fI(I), fR(R),
      fWeightDecay(weightDecay), fIsTraining(isTraining)
 {
    // Nothing to do here.
@@ -491,12 +491,13 @@ void TDeepNet<Architecture_t, Layer_t>::AddMaxPoolLayer(TMaxPoolLayer<Architectu
 
 //______________________________________________________________________________
 template <typename Architecture_t, typename Layer_t>
-TBasicRNNLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddBasicRNNLayer(size_t stateSize,
-                                                                                    size_t inputSize, size_t timeSteps,
+TBasicRNNLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddBasicRNNLayer(size_t stateSize, size_t inputSize,
+                                                                                    size_t timeSteps,
                                                                                     bool rememberState)
 {
-   TBasicRNNLayer<Architecture_t> *basicRNNLayer = new TBasicRNNLayer<Architecture_t>(
-      this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState, DNN::EActivationFunction::kTanh, fIsTraining, this->GetInitialization());
+   TBasicRNNLayer<Architecture_t> *basicRNNLayer =
+      new TBasicRNNLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState,
+                                         DNN::EActivationFunction::kTanh, fIsTraining, this->GetInitialization());
    fLayers.push_back(basicRNNLayer);
    return basicRNNLayer;
 }
@@ -685,24 +686,23 @@ auto TDeepNet<Architecture_t, Layer_t>::Initialize() -> void
 }
 
 template <typename Architecture>
-auto debugTensor(const std::vector<typename Architecture::Matrix_t> &A, const std::string name = "tensor")
--> void
+auto debugTensor(const std::vector<typename Architecture::Matrix_t> &A, const std::string name = "tensor") -> void
 {
-  std::cout << name << "\n";
-  for (size_t l = 0; l < A.size(); ++l) {
+   std::cout << name << "\n";
+   for (size_t l = 0; l < A.size(); ++l) {
       for (size_t i = 0; i < A[l].GetNrows(); ++i) {
-        for (size_t j = 0; j < A[l].GetNcols(); ++j) {
+         for (size_t j = 0; j < A[l].GetNcols(); ++j) {
             std::cout << A[l](i, j) << " ";
-        }
-        std::cout << "\n";
+         }
+         std::cout << "\n";
       }
       std::cout << "********\n";
-  } 
+   }
 }
 
 //______________________________________________________________________________
 template <typename Architecture_t, typename Layer_t>
-auto TDeepNet<Architecture_t, Layer_t>::Forward(std::vector<Matrix_t> input, bool applyDropout) -> void
+auto TDeepNet<Architecture_t, Layer_t>::Forward(std::vector<Matrix_t> &input, bool applyDropout) -> void
 {
    fLayers.front()->Forward(input, applyDropout);
 
@@ -717,6 +717,7 @@ auto TDeepNet<Architecture_t, Layer_t>::ParallelForward(std::vector<TDeepNet<Arc
                                                         std::vector<TTensorBatch<Architecture_t>> &batches,
                                                         bool applyDropout) -> void
 {
+   std::cout << "Parallel Forward" << std::endl;
    size_t depth = this->GetDepth();
 
    // The first layer of each deep net
@@ -855,9 +856,10 @@ auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, s
 
 //______________________________________________________________________________
 template <typename Architecture_t, typename Layer_t>
-auto TDeepNet<Architecture_t, Layer_t>::Backward(std::vector<Matrix_t> input, const Matrix_t &groundTruth,
+auto TDeepNet<Architecture_t, Layer_t>::Backward(std::vector<Matrix_t> &input, const Matrix_t &groundTruth,
                                                  const Matrix_t &weights) -> void
 {
+   std::cout << "Backward" << std::endl;
    std::vector<Matrix_t> inp1;
    std::vector<Matrix_t> inp2;
    // Last layer should be dense layer
@@ -867,15 +869,11 @@ auto TDeepNet<Architecture_t, Layer_t>::Backward(std::vector<Matrix_t> input, co
       std::vector<Matrix_t> &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
       std::vector<Matrix_t> &activations_backward = fLayers[i - 1]->GetOutput();
       fLayers[i]->Backward(activation_gradient_backward, activations_backward, inp1, inp2);
-      //debugTensor<Architecture_t>(activation_gradient_backward, "act grad backward after back of dense");
-   }
-
-   std::vector<Matrix_t> gradient_input;
-   for (size_t i = 0; i < input.size(); i++) {
-      gradient_input.emplace_back(input[i].GetNrows(), input[i].GetNcols());
+      // debugTensor<Architecture_t>(activation_gradient_backward, "act grad backward after back of dense");
    }
 
-   fLayers[0]->Backward(gradient_input, input, inp1, inp2);
+   std::vector<Matrix_t> dummy;
+   fLayers[0]->Backward(dummy, input, inp1, inp2);
 }
 
 //______________________________________________________________________________
@@ -884,6 +882,7 @@ auto TDeepNet<Architecture_t, Layer_t>::ParallelBackward(std::vector<TDeepNet<Ar
                                                          std::vector<TTensorBatch<Architecture_t>> &batches,
                                                          Scalar_t learningRate) -> void
 {
+   std::cout << "Parallel Backward" << std::endl;
    std::vector<Matrix_t> inp1;
    std::vector<Matrix_t> inp2;
    size_t depth = this->GetDepth();
@@ -904,10 +903,6 @@ auto TDeepNet<Architecture_t, Layer_t>::ParallelBackward(std::vector<TDeepNet<Ar
    }
 
    std::vector<Matrix_t> dummy;
-   for (size_t i = 0; i < this->GetBatchSize(); i++) {
-      // Should we determine the dimensions?
-      dummy.emplace_back(0, 0);
-   }
 
    // First layer of each deep net
    for (size_t i = 0; i < nets.size(); i++) {
@@ -935,6 +930,7 @@ auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardMomentum(std::vector<TDe
                                                                  std::vector<TTensorBatch<Architecture_t>> &batches,
                                                                  Scalar_t learningRate, Scalar_t momentum) -> void
 {
+   std::cout << "Parallel Backward Momentum" << std::endl;
    std::vector<Matrix_t> inp1;
    std::vector<Matrix_t> inp2;
    size_t depth = this->GetDepth();
@@ -964,10 +960,6 @@ auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardMomentum(std::vector<TDe
    }
 
    std::vector<Matrix_t> dummy;
-   for (size_t i = 0; i < this->GetBatchSize(); i++) {
-      // Should we determine the dimensions?
-      dummy.emplace_back(0, 0);
-   }
 
    // First layer of each deep net
    Layer_t *masterFirstLayer = this->GetLayerAt(0);
@@ -1002,6 +994,7 @@ auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardNestorov(std::vector<TDe
                                                                  std::vector<TTensorBatch<Architecture_t>> &batches,
                                                                  Scalar_t learningRate, Scalar_t momentum) -> void
 {
+   std::cout << "Parallel Backward Nestorov" << std::endl;
    std::vector<Matrix_t> inp1;
    std::vector<Matrix_t> inp2;
    size_t depth = this->GetDepth();
@@ -1024,10 +1017,6 @@ auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardNestorov(std::vector<TDe
    }
 
    std::vector<Matrix_t> dummy;
-   for (size_t i = 0; i < this->GetBatchSize(); i++) {
-      // Should we determine the dimensions?
-      dummy.emplace_back(0, 0);
-   }
 
    // First layer of each deep net
    for (size_t i = 0; i < nets.size(); i++) {

diff --git a/tmva/tmva/src/DNN/Architectures/Cpu/Propagation.cxx b/tmva/tmva/src/DNN/Architectures/Cpu/Propagation.cxx
@@ -18,76 +18,63 @@
 #include "TMVA/DNN/Architectures/Cpu.h"
 #include "TMVA/DNN/Architectures/Cpu/Blas.h"
 
-namespace TMVA
-{
-namespace DNN
-{
+namespace TMVA {
+namespace DNN {
 
-template<typename AFloat>
-void TCpu<AFloat>::MultiplyTranspose(TCpuMatrix<AFloat> &output,
-                                     const TCpuMatrix<AFloat> &input,
+template <typename AFloat>
+void TCpu<AFloat>::MultiplyTranspose(TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &input,
                                      const TCpuMatrix<AFloat> &Weights)
 {
-    int m = (int) input.GetNrows();
-    int k = (int) input.GetNcols();
-    int n = (int) Weights.GetNrows();
+   int m = (int)input.GetNrows();
+   int k = (int)input.GetNcols();
+   int n = (int)Weights.GetNrows();
 
-    char transa = 'N';
-    char transb = 'T';
+   char transa = 'N';
+   char transb = 'T';
 
-    AFloat alpha = 1.0;
-    AFloat beta  = 0.0;
+   AFloat alpha = 1.0;
+   AFloat beta = 0.0;
 
-    const AFloat *A = input.GetRawDataPointer();
-    const AFloat *B = Weights.GetRawDataPointer();
-          AFloat *C = output.GetRawDataPointer();
+   const AFloat *A = input.GetRawDataPointer();
+   const AFloat *B = Weights.GetRawDataPointer();
+   AFloat *C = output.GetRawDataPointer();
 
-    ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
-                            A, &m, B, &n, &beta, C, &m);
+   ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha, A, &m, B, &n, &beta, C, &m);
 }
 
-template<typename AFloat>
-void TCpu<AFloat>::AddRowWise(
-    TCpuMatrix<AFloat> &output,
-    const TCpuMatrix<AFloat> &biases)
+template <typename AFloat>
+void TCpu<AFloat>::AddRowWise(TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &biases)
 {
-    int m = (int) output.GetNrows();
-    int n = (int) output.GetNcols();
+   int m = (int)output.GetNrows();
+   int n = (int)output.GetNcols();
 
-    int inc = 1.0;
-    AFloat alpha = 1.0;
+   int inc = 1.0;
+   AFloat alpha = 1.0;
 
-          AFloat * A = output.GetRawDataPointer();
-    const AFloat * x = TCpuMatrix<AFloat>::GetOnePointer();
-    const AFloat * y = biases.GetRawDataPointer();
+   AFloat *A = output.GetRawDataPointer();
+   const AFloat *x = TCpuMatrix<AFloat>::GetOnePointer();
+   const AFloat *y = biases.GetRawDataPointer();
 
-    ::TMVA::DNN::Blas::Ger(&m, &n, &alpha, x, &inc, y, &inc, A, &m);
+   ::TMVA::DNN::Blas::Ger(&m, &n, &alpha, x, &inc, y, &inc, A, &m);
 }
 
-template<typename AFloat>
-void TCpu<AFloat>::Backward(
-    TCpuMatrix<AFloat> & activationGradientsBackward,
-    TCpuMatrix<AFloat> & weightGradients,
-    TCpuMatrix<AFloat> & biasGradients,
-    TCpuMatrix<AFloat> & df,
-    const TCpuMatrix<AFloat> & activationGradients,
-    const TCpuMatrix<AFloat> & weights,
-    const TCpuMatrix<AFloat> & activationsBackward)
+template <typename AFloat>
+void TCpu<AFloat>::Backward(TCpuMatrix<AFloat> &activationGradientsBackward, TCpuMatrix<AFloat> &weightGradients,
+                            TCpuMatrix<AFloat> &biasGradients, TCpuMatrix<AFloat> &df,
+                            const TCpuMatrix<AFloat> &activationGradients, const TCpuMatrix<AFloat> &weights,
+                            const TCpuMatrix<AFloat> &activationsBackward)
 {
    // Compute element-wise product.
    Hadamard(df, activationGradients);
 
    // Activation gradients.
-   if (activationGradientsBackward.GetNElements() > 0)
-       Multiply(activationGradientsBackward, df, weights);
+   if (activationGradientsBackward.GetNElements() > 0) Multiply(activationGradientsBackward, df, weights);
 
    // Weight gradients.
-   if (weightGradients.GetNElements() > 0)
-       TransposeMultiply(weightGradients, df, activationsBackward);
+   if (weightGradients.GetNElements() > 0) TransposeMultiply(weightGradients, df, activationsBackward);
 
    // Bias gradients.
-   if (biasGradients.GetNElements() > 0)
-       SumColumns(biasGradients, df);
+   if (biasGradients.GetNElements() > 0) SumColumns(biasGradients, df);
 }
 
 //____________________________________________________________________________
@@ -96,7 +83,7 @@ void TCpu<AFloat>::Im2col(TCpuMatrix<AFloat> &A, TCpuMatrix<AFloat> &B, size_t i
                           size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
                           size_t zeroPaddingHeight, size_t zeroPaddingWidth)
 {
-      // image boudaries
+   // image boudaries
    int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
    int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
    size_t currLocalView = 0;
@@ -179,8 +166,8 @@ void TCpu<AFloat>::ConvLayerBackward(std::vector<TCpuMatrix<AFloat>> &activation
    }
 
    // Calculate the activation gradients of the previous layer
-   CalculateConvActivationGradients(activationGradientsBackward, df, weights, batchSize, inputHeight, inputWidth,
-                                    depth, height, width, filterDepth, filterHeight, filterWidth);
+   CalculateConvActivationGradients(activationGradientsBackward, df, weights, batchSize, inputHeight, inputWidth, depth,
+                                    height, width, filterDepth, filterHeight, filterWidth);
 
    // Calculate the weight gradients
    CalculateConvWeightGradients(weightGradients, df, activationsBackward, batchSize, inputHeight, inputWidth, depth,
@@ -199,6 +186,8 @@ void TCpu<AFloat>::CalculateConvActivationGradients(std::vector<TCpuMatrix<AFloa
                                                     size_t width, size_t filterDepth, size_t filterHeight,
                                                     size_t filterWidth)
 {
+   if (activationGradientsBackward.size() == 0) return;
+
    // Transform the weights
    TCpuMatrix<AFloat> rotWeights(filterDepth, depth * filterHeight * filterWidth);
    RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.GetNrows());
@@ -234,8 +223,8 @@ void TCpu<AFloat>::CalculateConvWeightGradients(TCpuMatrix<AFloat> &weightGradie
                                                 size_t filterWidth, size_t nLocalViews)
 {
    // reinitialize the weight gradients to 0
-   for (size_t i = 0; i < depth; i++) {
-      for (size_t j = 0; j < nLocalViews; j++) {
+   for (size_t i = 0; i < weightGradients.GetNrows(); i++) {
+      for (size_t j = 0; j < weightGradients.GetNcols(); j++) {
          weightGradients(i, j) = 0;
       }
    }
@@ -266,11 +255,11 @@ void TCpu<AFloat>::CalculateConvWeightGradients(TCpuMatrix<AFloat> &weightGradie
          Im2col(rowDeltaTr, rowDelta, height, width, inputHeight, inputWidth, tempStrideRows, tempStrideCols,
                 tempZeroPaddingHeight, tempZeroPaddingWidth);
 
-        TransposeMultiply(res, activationsBackward[i], rowDeltaTr);
+         TransposeMultiply(res, activationsBackward[i], rowDeltaTr);
 
          for (size_t k = 0; k < filterDepth; k++) {
             for (size_t l = 0; l < filterHeight * filterWidth; l++) {
-               weightGradients(j, k * filterDepth + l) += res(k, (tempNLocalViews - 1) - l);
+               weightGradients(j, k * (filterHeight * filterWidth) + l) += res(k, (tempNLocalViews - 1) - l);
             }
          }
       }
@@ -402,11 +391,10 @@ void TCpu<AReal>::Rearrange(std::vector<TCpuMatrix<AReal>> &out, const std::vect
    size_t B = out.size();
    size_t T = out[0].GetNrows();
    size_t D = out[0].GetNcols();
-   if ((T != in.size()) || (B != in[0].GetNrows()) 
-       || (D != in[0].GetNcols())) {
+   if ((T != in.size()) || (B != in[0].GetNrows()) || (D != in[0].GetNcols())) {
       std::cout << "Incompatible Dimensions\n"
-         << in.size() << "x" << in[0].GetNrows() << "x" << in[0].GetNcols() 
-         << " --> " << B << "x" << T << "x" << D << "\n";
+                << in.size() << "x" << in[0].GetNrows() << "x" << in[0].GetNcols() << " --> " << B << "x" << T << "x"
+                << D << "\n";
       return;
    }
    for (size_t i = 0; i < B; ++i) {